program main !*****************************************************************************80 ! !! MAIN is the main program for POISSON_NONBLOCK_MPI. ! ! Discussion: ! ! POISSON_NONBLOCK_MPI solves the Poisson problem using nonblocking MPI. ! ! This program differs from POISSON_MPI only in the routine EXCHANGE_BAND. ! There, instead of calling MPI_RECV and MPI_SEND, the routine calls ! MPI_IRECV and MPI_ISEND, which are nonblocking data transfer routines. ! ! Theoretically, this could make the program run twice as fast. ! ! This program runs in parallel. Its output should be compared with that ! of POISSON_SERIAL. ! ! The Poisson equation ! ! DEL^2 U(X,Y) = F(X,Y) ! ! is solved on the unit square [0,1] x [0,1] using a grid of [0,NX+1] by ! [0,NY+1] evenly spaced points. ! ! The boundary conditions and F are set so that the exact solution is ! ! U(X,Y) = sin ( X * Y ) ! ! The Jacobi iteration is repeatedly applied until convergence is detected. ! ! For parallel execution, the domain is divided up into parallel horizontal ! strips. Each strip is assigned to a process. A process must communicate ! with the processes "above" and "below" it to get information about the ! solution value along their common interface. ! ! Licensing: ! ! This code is distributed under the GNU LGPL license. ! ! Modified: ! ! 08 October 2002 ! ! Author: ! ! John Burkardt ! ! Reference: ! ! William Gropp, Ewing Lusk, Anthony Skjellum, ! Using MPI: Portable Parallel Programming with the ! Message-Passing Interface, ! Second Edition, ! MIT Press, 1999, ! ISBN: 0262571323. ! use mpi integer, parameter :: ndim = 1 integer, parameter :: nx = 9 integer, parameter :: ny = 9 real, allocatable, dimension ( :, : ) :: a real anorm real, allocatable, dimension ( :, : ) :: b real bnorm integer bot integer cartesian_comm integer cartesian_id integer, parameter :: cartesian_master = 0 logical converged real diff real diff_part integer dim integer dims(ndim) real dx real dy integer e integer error real, allocatable, dimension ( :, : ) :: f real fnorm integer i integer it integer, parameter :: it_max = 100 integer j integer, parameter :: master = 0 integer num_procs logical periodic(ndim) logical reorder integer s integer shift integer top real, parameter :: tolerance = 0.001E+00 integer world_id ! ! Initialize MPI. ! call MPI_Init ( error ) ! ! Get the number of processes. ! call MPI_Comm_size ( MPI_COMM_WORLD, num_procs, error ) ! ! Get the individual process ID. ! call MPI_Comm_rank ( MPI_COMM_WORLD, world_id, error ) ! ! Print a message. ! if ( world_id == master ) then call timestamp ( ) write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'POISSON_NONBLOCK_MPI:' write ( *, '(a)' ) ' FORTRAN90 version' write ( *, '(a)' ) ' ' write ( *, '(a)' ) ' An MPI example program to solve the Poisson equation.' write ( *, '(a)' ) ' ' write ( *, '(a)' ) ' This version used NONBLOCKING communication' write ( *, '(a)' ) ' with MPI_Isend and MPI_Irecv.' write ( *, '(a)' ) ' ' write ( *, '(a,i8)' ) ' The number of interior X grid lines is ', nx write ( *, '(a,i8)' ) ' The number of interior Y grid lines is ', ny write ( *, '(a)' ) ' ' write ( *, '(a,i8)' ) ' The number of processes is ', num_procs write ( *, '(a)' ) ' ' end if write ( *, '(a,i8,a)' ) ' Process ', world_id, ' is active' ! ! Get a communicator for the Cartesian decomposition of the domain. ! dims(1) = num_procs periodic(1) = .false. reorder = .true. call MPI_Cart_create ( MPI_COMM_WORLD, ndim, dims, periodic, reorder, & cartesian_comm, error ) ! ! Get this processor's rank within the Cartesian communicator. ! call MPI_Comm_rank ( cartesian_comm, cartesian_id, error ) ! ! Determine the bottom and top neighbors of each processor. ! ! Note that MPI_Cart_shift counts dimensions using 0-based arithmetic! ! We only have one dimension, but to inquire about a shift in dimension 1 ! we actually ask about dimension 0! ! dim = 0 shift = 1 call MPI_Cart_shift ( cartesian_comm, dim, shift, bot, top, error ) ! ! Compute the rows "S" through "E" of data that are to be assigned to this ! process. ! ! (There's an MPE routine MPE_DECOMP1D that does this, but I don't know where ! MPE is, so I wrote my own!) ! call decomp_band ( ny, num_procs, cartesian_id, s, e ) ! ! Once we have the sizes S and E, we can allocate A, B, and F. ! allocate ( a(0:nx+1,s-1:e+1) ) allocate ( b(0:nx+1,s-1:e+1) ) allocate ( f(0:nx+1,s-1:e+1) ) ! ! Initialize the right hand side and initial solution guess. ! call init_band ( nx, ny, s, e, cartesian_id, cartesian_comm, num_procs, & dx, dy, f, a, b ) call get_norm ( nx, s, e, f, cartesian_comm, cartesian_master, fnorm ) if ( cartesian_id == cartesian_master ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'POISSON_NONBLOCK_MPI - Master Cartesian process:' write ( *, '(a,g14.6)' ) & ' Max norm of right hand side F at interior nodes = ', fnorm end if ! ! Do the iteration. ! We do two steps of iteration each time. ! converged = .false. if ( cartesian_id == cartesian_master ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'Step ||U|| ||Unew|| ||Unew-U||' write ( *, '(a)' ) ' ' end if do it = 1, it_max ! ! Share with neighbor processes the value of A at "ghost" points. ! call exchange_band ( nx, s, e, cartesian_comm, bot, top, a ) ! ! Perform a Jacobi sweep, computing B from A. ! call sweep_band ( nx, s, e, dx, dy, f, a, b ) call get_norm ( nx, s, e, b, cartesian_comm, cartesian_master, bnorm ) ! ! Share with neighbor processes the value of B at "ghost" points. ! call exchange_band ( nx, s, e, cartesian_comm, bot, top, b ) ! ! Perform a Jacobi sweep, computing A from B. ! call sweep_band ( nx, s, e, dx, dy, f, b, a ) call get_norm ( nx, s, e, a, cartesian_comm, cartesian_master, anorm ) ! ! Check for convergence. ! diff_part = 0.0E+00 do j = s, e do i = 1, nx diff_part = max ( diff_part, abs ( a(i,j) - b(i,j) ) ) end do end do call MPI_Allreduce ( diff_part, diff, 1, MPI_REAL, MPI_MAX, & cartesian_comm, error ) if ( cartesian_id == cartesian_master ) then write ( *, '(i8,3g14.6)' ) it, bnorm, anorm, diff end if if ( diff <= tolerance ) then converged = .true. exit end if end do if ( world_id == master ) then if ( converged ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'POISSON_NONBLOCK_MPI - Master process:' write ( *, '(a)' ) ' The iteration has converged' else write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'POISSON_NONBLOCK_MPI - Master process:' write ( *, '(a)' ) ' The iteration has NOT converged' end if end if ! ! Free memory. ! deallocate ( a ) deallocate ( b ) deallocate ( f ) ! ! Terminate MPI. ! call MPI_Finalize ( error ) ! ! Terminate. ! if ( world_id == master ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'POISSON_NONBLOCK_MPI - Master process:' write ( *, '(a)' ) ' Normal end of execution.' write ( *, '(a)' ) ' ' call timestamp ( ) end if stop end subroutine get_norm ( nx, s, e, a, cartesian_comm, cartesian_master, anorm ) !*****************************************************************************80 ! !! GET_NORM gets the max norm of a shared quantity, over the grid interior. ! ! Licensing: ! ! This code is distributed under the GNU LGPL license. ! ! Modified: ! ! 24 September 2002 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer NX, the X grid dimension. ! ! Input, integer S, E, indicates the first and last indices of Y data ! to be controlled by this process. ! ! Input, real A(0:NX+1,S-1:E+1), the quantity whose max norm is desired. ! ! Input, integer CARTESIAN_COMM, the id of the Cartesian communicator. ! ! Input, integer CARTESIAN_MASTER, the id of the master Cartesian process. ! ! Output, real ANORM, the maximum absolute value of A, over the whole ! interior grid. ! use mpi integer e integer nx integer s real a(0:nx+1,s-1:e+1) real anorm real anorm_part integer cartesian_comm integer cartesian_master integer error integer i integer j anorm_part = 0.0E+00 do j = s, e do i = 1, nx anorm_part = max ( anorm_part, abs ( a(i,j) ) ) end do end do call MPI_Reduce ( anorm_part, anorm, 1, MPI_REAL, MPI_MAX, cartesian_master, & cartesian_comm, error ) return end subroutine decomp_band ( m, n, r, s, e ) !*****************************************************************************80 ! !! DECOMP_BAND determines the part of the work to be done by this process. ! ! Discussion: ! ! This routine is a replacement for the routine MPE_Decomp1d, and ! is an informed guess as to how it works. ! ! Given M things in a row, divide them up in consecutive chunks among ! N processes so that the number assigned to each process is nearly ! equal. ! ! If M is exactly divisible by N, then each process gets M/N consecutive ! tasks. ! ! Otherwise, if there is a remainder of P, then the first P processes get ! M/N+1 tasks, and the last (N-P) processes get M/N tasks. ! ! Example: ! ! M = 27, N = 10 ! ! R S E ! -- -- -- ! 0: 1 3 ! 1: 4 6 ! 2: 7 9 ! 3: 10 12 ! 4: 13 15 ! 5: 16 18 ! 6: 19 21 ! 7: 22 23 ! 8: 24 25 ! 9: 26 27 ! ! Licensing: ! ! This code is distributed under the GNU LGPL license. ! ! Modified: ! ! 19 September 2002 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer M, the number of tasks to be divided among processes. ! ! Input, integer N, the number of processes. ! ! Input, integer R, the rank of a particular process. Because of ! peculiarities of MPI, R will be between 0 and N-1. ! ! Output, integer S, E, the first and last tasks to be given to process R. ! S will be 1 or greater, E will be M or less. ! implicit none integer e integer m integer n integer part integer r integer s integer whole whole = m / n part = m - whole * n if ( r + 1 <= part ) then s = r * ( whole + 1 ) + 1 e = ( r + 1 ) * ( whole + 1 ) else s = r * whole + 1 + part e = ( r + 1 ) * whole + part end if return end subroutine init_band ( nx, ny, s, e, cartesian_id, cartesian_comm, num_procs, & dx, dy, f, a, b ) !*****************************************************************************80 ! !! INIT_BAND initializes the arrays. ! ! Licensing: ! ! This code is distributed under the GNU LGPL license. ! ! Modified: ! ! 19 September 2002 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer NX, NY, the X and Y grid dimensions. ! ! Input, integer S, E, indicates the first and last indices of Y data ! to be controlled by this process. ! ! Input, integer CARTESIAN_ID, the rank of this process. ! ! Input, integer CARTESIAN_COMM, the id of the Cartesian communicator. ! ! Input, integer NUM_PROCS, the number of processes. ! ! Output, real DX, DY, the spacing between grid points. ! ! Output, real F(0:NX+1,S-1:E+1), the right hand side data. ! ! Output, real A(0:NX+1,S-1:E+1), B(0:NX+1,S-1:E+1), the initial ! solution estimates. ! use mpi integer e integer nx integer s real a(0:nx+1,s-1:e+1) real b(0:nx+1,s-1:e+1) integer cartesian_id integer cartesian_comm integer, parameter :: cartesian_master = 0 real dx real dy integer error real f(0:nx+1,s-1:e+1) real fnorm real fnorm_part integer i integer j integer num_procs integer ny real x real y dx = 1.0E+00 / real ( nx + 1 ) dy = 1.0E+00 / real ( ny + 1 ) if ( cartesian_id == cartesian_master ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'INIT_BAND - Master Cartesian process:' write ( *, '(a,g14.6)' ) ' The X grid spacing is ', dx write ( *, '(a,g14.6)' ) ' The Y grid spacing is ', dy end if ! ! Set the initial guesses to zero. ! a(0:nx+1,s-1:e+1) = 0.0E+00 b(0:nx+1,s-1:e+1) = 0.0E+00 ! ! The "boundary" entries of F will store the boundary values of the solution. ! fnorm_part = 0.0E+00 x = 0.0E+00 do j = s, e y = real ( j ) / real ( ny + 1 ) f(0,j) = sin ( x * y ) a(0,j) = f(0,j) b(0,j) = f(0,j) fnorm_part = max ( fnorm_part, abs ( f(0,j) ) ) end do x = 1.0E+00 do j = s, e y = real ( j ) / real ( ny + 1 ) f(nx+1,j) = sin ( x * y ) a(nx+1,j) = f(nx+1,j) b(nx+1,j) = f(nx+1,j) fnorm_part = max ( fnorm_part, abs ( f(nx+1,j) ) ) end do if ( cartesian_id == 0 ) then y = 0.0E+00 do i = 0, nx+1 x = real ( i ) / real ( nx + 1 ) f(i,0) = sin ( x * y ) a(i,0) = f(i,0) b(i,0) = f(i,0) fnorm_part = max ( fnorm_part, abs ( f(i,0) ) ) end do end if if ( cartesian_id == num_procs-1 ) then y = 1.0E+00 do i = 0, nx+1 x = real ( i ) / real ( nx + 1 ) f(i,ny+1) = sin ( x * y ) a(i,ny+1) = f(i,ny+1) b(i,ny+1) = f(i,ny+1) fnorm_part = max ( fnorm_part, abs ( f(i,ny+1) ) ) end do end if call MPI_Reduce ( fnorm_part, fnorm, 1, MPI_REAL, MPI_MAX, cartesian_master, & cartesian_comm, error ) if ( cartesian_id == cartesian_master ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'INIT_BAND - Master Cartesian process:' write ( *, '(a,g14.6)' ) ' Max norm of boundary values = ', fnorm end if ! ! The "interior" entries of F will store the right hand sides of the ! Poisson equation. ! do j = s, e y = real ( j ) / real ( ny + 1 ) do i = 1, nx x = real ( i ) / real ( nx + 1 ) f(i,j) = - ( x**2 + y**2 ) * sin ( x * y ) end do end do return end subroutine exchange_band ( nx, s, e, cartesian_comm, bot, top, a ) !*****************************************************************************80 ! !! EXCHANGE_BAND swaps interface information between neighboring processes. ! ! Discussion: ! ! Nonblocking MPI send and receive routines are used. Control returns ! immediately, once the requests have been initiated. In order to ! guarantee that the requests have been COMPLETED, it is necessary to ! call a special routine such as MPI_WAIT_ALL. ! ! Theoretically, this routine could run twice as fast as one which ! used blocking communication. ! ! Licensing: ! ! This code is distributed under the GNU LGPL license. ! ! Modified: ! ! 08 October 2002 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer NX, the number of (interior) grid lines in the X direction. ! ! Input, integer S, E, indicates the first and last indices of Y data ! to be controlled by this process. ! ! Input, integer CARTESIAN_COMM, the identifier for the Cartesian ! communicator. ! ! Input, integer BOT, TOP, the identifiers of the processes "below" and ! "above" this process, with which it must exchange information. ! ! Input/output, real A(0:NX+1,S-1:E+1), an array of data. Column ! S must be sent to the "bottom" process and "E" to the top process. ! Values for S-1 and E+1 will be received from the bottom and top ! processes. ! use mpi integer e integer nx integer s real a(0:nx+1,s-1:e+1) integer bot integer cartesian_comm integer error integer request(4) integer status_array(MPI_STATUS_SIZE,4) integer tag integer top tag = 0 call MPI_Irecv ( a(1,s-1), nx, MPI_REAL, bot, tag, cartesian_comm, & request(1), error ) tag = 1 call MPI_Irecv ( a(1,e+1), nx, MPI_REAL, top, tag, cartesian_comm, & request(2), error ) tag = 0 call MPI_Isend ( a(1,e), nx, MPI_REAL, top, tag, cartesian_comm, & request(3), error ) tag = 1 call MPI_Isend ( a(1,s), nx, MPI_REAL, bot, tag, cartesian_comm, & request(4), error ) ! ! Wait for all the communication to complete before proceeding. ! call MPI_Waitall ( 4, request, status_array, error ) return end subroutine sweep_band ( nx, s, e, dx, dy, f, u, unew ) !*****************************************************************************80 ! !! SWEEP_BAND carries out one step of the Jacobi iteration. ! ! Licensing: ! ! This code is distributed under the GNU LGPL license. ! ! Modified: ! ! 16 September 2002 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer NX, the X grid dimension. ! ! Input, integer S, E, indicates the first and last indices of Y data ! to be controlled by this process. ! ! Input, real DX, DY, the spacing between grid points. ! ! Input, real F(0:NX+1,S-1:E+1), the right hand side data. ! ! Input, real U(0:NX+1,S-1:E+1), the previous solution estimate. ! ! Output, real UNEW(0:NX+1,S-1:E+1), the updated solution estimate. ! implicit none integer e integer nx integer s real dx real dy real f(0:nx+1,s-1:e+1) integer i integer j real u(0:nx+1,s-1:e+1) real unew(0:nx+1,s-1:e+1) do j = s, e do i = 1, nx unew(i,j) = ( u(i-1,j) + u(i,j+1) + u(i,j-1) + u(i+1,j) ) / 4.0E+00 & - f(i,j) * dx * dy end do end do return end subroutine timestamp ( ) !*****************************************************************************80 ! !! TIMESTAMP prints the current YMDHMS date as a time stamp. ! ! Example: ! ! 31 May 2001 9:45:54.872 AM ! ! Licensing: ! ! This code is distributed under the GNU LGPL license. ! ! Modified: ! ! 18 May 2013 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! None ! implicit none character ( len = 8 ) ampm integer ( kind = 4 ) d integer ( kind = 4 ) h integer ( kind = 4 ) m integer ( kind = 4 ) mm character ( len = 9 ), parameter, dimension(12) :: month = (/ & 'January ', 'February ', 'March ', 'April ', & 'May ', 'June ', 'July ', 'August ', & 'September', 'October ', 'November ', 'December ' /) integer ( kind = 4 ) n integer ( kind = 4 ) s integer ( kind = 4 ) values(8) integer ( kind = 4 ) y call date_and_time ( values = values ) y = values(1) m = values(2) d = values(3) h = values(5) n = values(6) s = values(7) mm = values(8) if ( h < 12 ) then ampm = 'AM' else if ( h == 12 ) then if ( n == 0 .and. s == 0 ) then ampm = 'Noon' else ampm = 'PM' end if else h = h - 12 if ( h < 12 ) then ampm = 'PM' else if ( h == 12 ) then if ( n == 0 .and. s == 0 ) then ampm = 'Midnight' else ampm = 'AM' end if end if end if write ( *, '(i2.2,1x,a,1x,i4,2x,i2,a1,i2.2,a1,i2.2,a1,i3.3,1x,a)' ) & d, trim ( month(m) ), y, h, ':', n, ':', s, '.', mm, trim ( ampm ) return end