! ! ! ! N A S P A R A L L E L B E N C H M A R K S 3.3 ! ! ! ! S E R I A L V E R S I O N ! ! ! ! B T ! ! ! ! ! ! ! This benchmark is a serial version of the NPB BT code. ! ! Refer to NAS Technical Reports 95-020 and 99-011 for details. ! ! ! ! Permission to use, copy, distribute and modify this software ! ! for any purpose with or without fee is hereby granted. We ! ! request, however, that all derived work reference the NAS ! ! Parallel Benchmarks 3.3. This software is provided "as is" ! ! without express or implied warranty. ! ! ! ! Information on NPB 3.3, including the technical report, the ! ! original specifications, source code, results and information ! ! on how to submit new results, is available at: ! ! ! ! http://www.nas.nasa.gov/Software/NPB/ ! ! ! ! Send comments or suggestions to npb@nas.nasa.gov ! ! ! ! NAS Parallel Benchmarks Group ! ! NASA Ames Research Center ! ! Mail Stop: T27A-1 ! ! Moffett Field, CA 94035-1000 ! ! ! ! E-mail: npb@nas.nasa.gov ! ! Fax: (650) 604-3957 ! ! ! ! ! ! ! Authors: R. Van der Wijngaart ! T. Harris ! M. Yarrow ! H. Jin ! ! program main !*****************************************************************************80 ! !! MAIN is the main program for the BT_SERIAL benchmark. ! include 'header.h' integer i, niter, step, fstatus double precision navg, mflops, n3 external timer_read double precision tmax, timer_read, t, trecs(t_last) logical verified character class character t_names(t_last)*8 ! ! Root node reads input file (if it exists) else takes ! defaults from parameters ! open (unit=2,file='timer.flag',status='old', iostat=fstatus) if (fstatus == 0) then timeron = .true. t_names(t_total) = 'total' t_names(t_rhsx) = 'rhsx' t_names(t_rhsy) = 'rhsy' t_names(t_rhsz) = 'rhsz' t_names(t_rhs) = 'rhs' t_names(t_xsolve) = 'xsolve' t_names(t_ysolve) = 'ysolve' t_names(t_zsolve) = 'zsolve' t_names(t_rdis1) = 'redist1' t_names(t_rdis2) = 'redist2' t_names(t_add) = 'add' close(2) else timeron = .false. end if write(*, 1000) open (unit=2,file='inputbt.data',status='old', iostat=fstatus) if (fstatus == 0) then write(*,233) 233 format(' Reading from input file inputbt.data') read (2,*) niter read (2,*) dt read (2,*) grid_points(1), grid_points(2), grid_points(3) close(2) else write(*,234) niter = niter_default dt = dt_default grid_points(1) = problem_size grid_points(2) = problem_size grid_points(3) = problem_size end if 234 format(' No input file inputbt.data. Using compiled defaults') write(*, 1001) grid_points(1), grid_points(2), grid_points(3) write(*, 1002) niter, dt write(*, *) 1000 format(//, ' NAS Parallel Benchmarks (NPB3.3-SER)', & ' - BT Benchmark',/) 1001 format(' Size: ', i4, 'x', i4, 'x', i4) 1002 format(' Iterations: ', i4, ' dt: ', F10.6) if ( (grid_points(1) .gt. IMAX) .or. & (grid_points(2) .gt. JMAX) .or. & (grid_points(3) .gt. KMAX) ) then print *, (grid_points(i),i=1,3) print *,' Problem size too big for compiled array sizes' goto 999 end if call set_constants call initialize do i = 1, t_last call timer_clear(i) end do ! call lhsinit call exact_rhs ! ! do one time step to touch all code, and reinitialize ! call adi call initialize do i = 1, t_last call timer_clear(i) end do call timer_start(1) do step = 1, niter if (mod(step, 20) == 0 .or. step == 1) then write(*, 200) step 200 format(' Time step ', i4) end if call adi end do call timer_stop(1) tmax = timer_read(1) call verify(niter, class, verified) n3 = 1.0d0*grid_points(1)*grid_points(2)*grid_points(3) navg = (grid_points(1)+grid_points(2)+grid_points(3))/3.0 if( tmax .ne. 0. ) then mflops = 1.0e-6*float(niter)* & (3478.8*n3-17655.7*navg**2+28023.7*navg) & / tmax else mflops = 0.0 end if call print_results('BT', class, grid_points(1), & grid_points(2), grid_points(3), niter, & tmax, mflops, ' floating point', & verified, npbversion,compiletime, cs1, cs2, cs3, cs4, cs5, & cs6, '(none)') ! ! More timers ! if (.not.timeron) goto 999 do i=1, t_last trecs(i) = timer_read(i) end do if (tmax == 0.0) tmax = 1.0 write(*,800) 800 format(' SECTION Time (secs)') do i=1, t_last write(*,810) t_names(i), trecs(i), trecs(i)*100./tmax if (i==t_rhs) then t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz) write(*,820) 'sub-rhs', t, t*100./tmax t = trecs(t_rhs) - t write(*,820) 'rest-rhs', t, t*100./tmax elseif (i==t_zsolve) then t = trecs(t_zsolve) - trecs(t_rdis1) - trecs(t_rdis2) write(*,820) 'sub-zsol', t, t*100./tmax elseif (i==t_rdis2) then t = trecs(t_rdis1) + trecs(t_rdis2) write(*,820) 'redist', t, t*100./tmax end if 810 format(2x,a8,':',f9.3,' (',f6.2,'%)') 820 format(' --> total ',a8,':',f9.3,' (',f6.2,'%)') end do 999 continue end subroutine add !*****************************************************************************80 ! !! ADD updates the vector U. ! include 'header.h' integer i, j, k, m if (timeron) call timer_start(t_add) do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 u(m,i,j,k) = u(m,i,j,k) + rhs(m,i,j,k) end do end do end do end do if (timeron) call timer_stop(t_add) return end subroutine adi !*****************************************************************************80 ! !! ADI carries out one step of the alternating direction implicit method. ! call compute_rhs ( ) call x_solve ( ) call y_solve ( ) call z_solve ( ) call add ( ) return end subroutine error_norm(rms) !*****************************************************************************80 ! !! ERROR_NORM computes the norm of the error. ! ! Discussion: ! ! The error is the difference between the computed solution and the exact ! solutions ! include 'header.h' integer i, j, k, m, d double precision xi, eta, zeta, u_exact(5), rms(5), add do m = 1, 5 rms(m) = 0.0d0 end do do k = 0, grid_points(3)-1 zeta = dble(k) * dnzm1 do j = 0, grid_points(2)-1 eta = dble(j) * dnym1 do i = 0, grid_points(1)-1 xi = dble(i) * dnxm1 call exact_solution(xi, eta, zeta, u_exact) do m = 1, 5 add = u(m,i,j,k)-u_exact(m) rms(m) = rms(m) + add*add end do end do end do end do do m = 1, 5 do d = 1, 3 rms(m) = rms(m) / dble(grid_points(d)-2) end do rms(m) = dsqrt(rms(m)) end do return end subroutine rhs_norm(rms) !*****************************************************************************80 ! !! RHS_NORM computes the norm of the right hand side. ! include 'header.h' integer i, j, k, d, m double precision rms(5), add do m = 1, 5 rms(m) = 0.0d0 end do do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 add = rhs(m,i,j,k) rms(m) = rms(m) + add*add end do end do end do end do do m = 1, 5 do d = 1, 3 rms(m) = rms(m) / dble(grid_points(d)-2) end do rms(m) = dsqrt(rms(m)) end do return end subroutine exact_rhs ( ) !*****************************************************************************80 ! !! EXACT_RHS computes the right hand side based on the exact solution. ! include 'header.h' double precision dtemp(5), xi, eta, zeta, dtpp integer m, i, j, k, ip1, im1, jp1, jm1, km1, kp1 ! ! initialize ! do k= 0, grid_points(3)-1 do j = 0, grid_points(2)-1 do i = 0, grid_points(1)-1 do m = 1, 5 forcing(m,i,j,k) = 0.0d0 end do end do end do end do ! ! xi-direction flux differences ! do k = 1, grid_points(3)-2 zeta = dble(k) * dnzm1 do j = 1, grid_points(2)-2 eta = dble(j) * dnym1 do i=0, grid_points(1)-1 xi = dble(i) * dnxm1 call exact_solution(xi, eta, zeta, dtemp) do m = 1, 5 ue(i,m) = dtemp(m) end do dtpp = 1.0d0 / dtemp(1) do m = 2, 5 buf(i,m) = dtpp * dtemp(m) end do cuf(i) = buf(i,2) * buf(i,2) buf(i,1) = cuf(i) + buf(i,3) * buf(i,3) + & buf(i,4) * buf(i,4) q(i) = 0.5d0*(buf(i,2)*ue(i,2) + buf(i,3)*ue(i,3) + & buf(i,4)*ue(i,4)) end do do i = 1, grid_points(1)-2 im1 = i-1 ip1 = i+1 forcing(1,i,j,k) = forcing(1,i,j,k) - & tx2*( ue(ip1,2)-ue(im1,2) )+ & dx1tx1*(ue(ip1,1)-2.0d0*ue(i,1)+ue(im1,1)) forcing(2,i,j,k) = forcing(2,i,j,k) - tx2 * ( & (ue(ip1,2)*buf(ip1,2)+c2*(ue(ip1,5)-q(ip1)))- & (ue(im1,2)*buf(im1,2)+c2*(ue(im1,5)-q(im1))))+ & xxcon1*(buf(ip1,2)-2.0d0*buf(i,2)+buf(im1,2))+ & dx2tx1*( ue(ip1,2)-2.0d0* ue(i,2)+ue(im1,2)) forcing(3,i,j,k) = forcing(3,i,j,k) - tx2 * ( & ue(ip1,3)*buf(ip1,2)-ue(im1,3)*buf(im1,2))+ & xxcon2*(buf(ip1,3)-2.0d0*buf(i,3)+buf(im1,3))+ & dx3tx1*( ue(ip1,3)-2.0d0*ue(i,3) +ue(im1,3)) forcing(4,i,j,k) = forcing(4,i,j,k) - tx2*( & ue(ip1,4)*buf(ip1,2)-ue(im1,4)*buf(im1,2))+ & xxcon2*(buf(ip1,4)-2.0d0*buf(i,4)+buf(im1,4))+ & dx4tx1*( ue(ip1,4)-2.0d0* ue(i,4)+ ue(im1,4)) forcing(5,i,j,k) = forcing(5,i,j,k) - tx2*( & buf(ip1,2)*(c1*ue(ip1,5)-c2*q(ip1))- & buf(im1,2)*(c1*ue(im1,5)-c2*q(im1)))+ & 0.5d0*xxcon3*(buf(ip1,1)-2.0d0*buf(i,1)+ & buf(im1,1))+ & xxcon4*(cuf(ip1)-2.0d0*cuf(i)+cuf(im1))+ & xxcon5*(buf(ip1,5)-2.0d0*buf(i,5)+buf(im1,5))+ & dx5tx1*( ue(ip1,5)-2.0d0* ue(i,5)+ ue(im1,5)) end do ! ! Fourth-order dissipation ! do m = 1, 5 i = 1 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (5.0d0*ue(i,m) - 4.0d0*ue(i+1,m) +ue(i+2,m)) i = 2 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (-4.0d0*ue(i-1,m) + 6.0d0*ue(i,m) - & 4.0d0*ue(i+1,m) + ue(i+2,m)) end do do i = 3, grid_points(1)-4 do m = 1, 5 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp* & (ue(i-2,m) - 4.0d0*ue(i-1,m) + & 6.0d0*ue(i,m) - 4.0d0*ue(i+1,m) + ue(i+2,m)) end do end do do m = 1, 5 i = grid_points(1)-3 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (ue(i-2,m) - 4.0d0*ue(i-1,m) + & 6.0d0*ue(i,m) - 4.0d0*ue(i+1,m)) i = grid_points(1)-2 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (ue(i-2,m) - 4.0d0*ue(i-1,m) + 5.0d0*ue(i,m)) end do end do end do ! ! eta-direction flux differences ! do k = 1, grid_points(3)-2 zeta = dble(k) * dnzm1 do i=1, grid_points(1)-2 xi = dble(i) * dnxm1 do j=0, grid_points(2)-1 eta = dble(j) * dnym1 call exact_solution(xi, eta, zeta, dtemp) do m = 1, 5 ue(j,m) = dtemp(m) end do dtpp = 1.0d0/dtemp(1) do m = 2, 5 buf(j,m) = dtpp * dtemp(m) end do cuf(j) = buf(j,3) * buf(j,3) buf(j,1) = cuf(j) + buf(j,2) * buf(j,2) + & buf(j,4) * buf(j,4) q(j) = 0.5d0*(buf(j,2)*ue(j,2) + buf(j,3)*ue(j,3) + & buf(j,4)*ue(j,4)) end do do j = 1, grid_points(2)-2 jm1 = j-1 jp1 = j+1 forcing(1,i,j,k) = forcing(1,i,j,k) - & ty2*( ue(jp1,3)-ue(jm1,3) )+ & dy1ty1*(ue(jp1,1)-2.0d0*ue(j,1)+ue(jm1,1)) forcing(2,i,j,k) = forcing(2,i,j,k) - ty2*( & ue(jp1,2)*buf(jp1,3)-ue(jm1,2)*buf(jm1,3))+ & yycon2*(buf(jp1,2)-2.0d0*buf(j,2)+buf(jm1,2))+ & dy2ty1*( ue(jp1,2)-2.0* ue(j,2)+ ue(jm1,2)) forcing(3,i,j,k) = forcing(3,i,j,k) - ty2*( & (ue(jp1,3)*buf(jp1,3)+c2*(ue(jp1,5)-q(jp1)))- & (ue(jm1,3)*buf(jm1,3)+c2*(ue(jm1,5)-q(jm1))))+ & yycon1*(buf(jp1,3)-2.0d0*buf(j,3)+buf(jm1,3))+ & dy3ty1*( ue(jp1,3)-2.0d0*ue(j,3) +ue(jm1,3)) forcing(4,i,j,k) = forcing(4,i,j,k) - ty2*( & ue(jp1,4)*buf(jp1,3)-ue(jm1,4)*buf(jm1,3))+ & yycon2*(buf(jp1,4)-2.0d0*buf(j,4)+buf(jm1,4))+ & dy4ty1*( ue(jp1,4)-2.0d0*ue(j,4)+ ue(jm1,4)) forcing(5,i,j,k) = forcing(5,i,j,k) - ty2*( & buf(jp1,3)*(c1*ue(jp1,5)-c2*q(jp1))- & buf(jm1,3)*(c1*ue(jm1,5)-c2*q(jm1)))+ & 0.5d0*yycon3*(buf(jp1,1)-2.0d0*buf(j,1)+ & buf(jm1,1))+ & yycon4*(cuf(jp1)-2.0d0*cuf(j)+cuf(jm1))+ & yycon5*(buf(jp1,5)-2.0d0*buf(j,5)+buf(jm1,5))+ & dy5ty1*(ue(jp1,5)-2.0d0*ue(j,5)+ue(jm1,5)) end do ! ! Fourth-order dissipation ! do m = 1, 5 j = 1 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (5.0d0*ue(j,m) - 4.0d0*ue(j+1,m) +ue(j+2,m)) j = 2 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (-4.0d0*ue(j-1,m) + 6.0d0*ue(j,m) - & 4.0d0*ue(j+1,m) + ue(j+2,m)) end do do j = 3, grid_points(2)-4 do m = 1, 5 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp* & (ue(j-2,m) - 4.0d0*ue(j-1,m) + & 6.0d0*ue(j,m) - 4.0d0*ue(j+1,m) + ue(j+2,m)) end do end do do m = 1, 5 j = grid_points(2)-3 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (ue(j-2,m) - 4.0d0*ue(j-1,m) + & 6.0d0*ue(j,m) - 4.0d0*ue(j+1,m)) j = grid_points(2)-2 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (ue(j-2,m) - 4.0d0*ue(j-1,m) + 5.0d0*ue(j,m)) end do end do end do ! ! zeta-direction flux differences ! do j=1, grid_points(2)-2 eta = dble(j) * dnym1 do i = 1, grid_points(1)-2 xi = dble(i) * dnxm1 do k=0, grid_points(3)-1 zeta = dble(k) * dnzm1 call exact_solution(xi, eta, zeta, dtemp) do m = 1, 5 ue(k,m) = dtemp(m) end do dtpp = 1.0d0/dtemp(1) do m = 2, 5 buf(k,m) = dtpp * dtemp(m) end do cuf(k) = buf(k,4) * buf(k,4) buf(k,1) = cuf(k) + buf(k,2) * buf(k,2) + & buf(k,3) * buf(k,3) q(k) = 0.5d0*(buf(k,2)*ue(k,2) + buf(k,3)*ue(k,3) + & buf(k,4)*ue(k,4)) end do do k=1, grid_points(3)-2 km1 = k-1 kp1 = k+1 forcing(1,i,j,k) = forcing(1,i,j,k) - & tz2*( ue(kp1,4)-ue(km1,4) )+ & dz1tz1*(ue(kp1,1)-2.0d0*ue(k,1)+ue(km1,1)) forcing(2,i,j,k) = forcing(2,i,j,k) - tz2 * ( & ue(kp1,2)*buf(kp1,4)-ue(km1,2)*buf(km1,4))+ & zzcon2*(buf(kp1,2)-2.0d0*buf(k,2)+buf(km1,2))+ & dz2tz1*( ue(kp1,2)-2.0d0* ue(k,2)+ ue(km1,2)) forcing(3,i,j,k) = forcing(3,i,j,k) - tz2 * ( & ue(kp1,3)*buf(kp1,4)-ue(km1,3)*buf(km1,4))+ & zzcon2*(buf(kp1,3)-2.0d0*buf(k,3)+buf(km1,3))+ & dz3tz1*(ue(kp1,3)-2.0d0*ue(k,3)+ue(km1,3)) forcing(4,i,j,k) = forcing(4,i,j,k) - tz2 * ( & (ue(kp1,4)*buf(kp1,4)+c2*(ue(kp1,5)-q(kp1)))- & (ue(km1,4)*buf(km1,4)+c2*(ue(km1,5)-q(km1))))+ & zzcon1*(buf(kp1,4)-2.0d0*buf(k,4)+buf(km1,4))+ & dz4tz1*( ue(kp1,4)-2.0d0*ue(k,4) +ue(km1,4)) forcing(5,i,j,k) = forcing(5,i,j,k) - tz2 * ( & buf(kp1,4)*(c1*ue(kp1,5)-c2*q(kp1))- & buf(km1,4)*(c1*ue(km1,5)-c2*q(km1)))+ & 0.5d0*zzcon3*(buf(kp1,1)-2.0d0*buf(k,1) & +buf(km1,1))+ & zzcon4*(cuf(kp1)-2.0d0*cuf(k)+cuf(km1))+ & zzcon5*(buf(kp1,5)-2.0d0*buf(k,5)+buf(km1,5))+ & dz5tz1*( ue(kp1,5)-2.0d0*ue(k,5)+ ue(km1,5)) end do ! ! Fourth-order dissipation ! do m = 1, 5 k = 1 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (5.0d0*ue(k,m) - 4.0d0*ue(k+1,m) +ue(k+2,m)) k = 2 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (-4.0d0*ue(k-1,m) + 6.0d0*ue(k,m) - & 4.0d0*ue(k+1,m) + ue(k+2,m)) end do do k = 3, grid_points(3)-4 do m = 1, 5 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp* & (ue(k-2,m) - 4.0d0*ue(k-1,m) + & 6.0d0*ue(k,m) - 4.0d0*ue(k+1,m) + ue(k+2,m)) end do end do do m = 1, 5 k = grid_points(3)-3 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (ue(k-2,m) - 4.0d0*ue(k-1,m) + & 6.0d0*ue(k,m) - 4.0d0*ue(k+1,m)) k = grid_points(3)-2 forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * & (ue(k-2,m) - 4.0d0*ue(k-1,m) + 5.0d0*ue(k,m)) end do end do end do ! ! Now change the sign of the forcing function, ! do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 forcing(m,i,j,k) = -1.d0 * forcing(m,i,j,k) end do end do end do end do return end subroutine exact_solution ( xi, eta, zeta, dtemp ) !*****************************************************************************80 ! !! EXACT_SOLUTION returns the exact solution at point (xi, eta, zeta). ! include 'header.h' double precision dtemp(5) double precision eta integer m double precision xi double precision zeta do m = 1, 5 dtemp(m) = ce(m,1) + & xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) + & eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+ & zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + & zeta*ce(m,13)))) end do return end subroutine initialize ( ) !*****************************************************************************80 ! !! INITIALIZE initializes the field variable u. ! ! Discussion: ! ! The routine uses tri-linear transfinite interpolation of the ! boundary values. ! include 'header.h' integer i, j, k, m, ix, iy, iz double precision xi, eta, zeta, Pface(5,3,2), Pxi, Peta, & Pzeta, temp(5) ! ! Later (in compute_rhs) we compute 1/u for every element. A few of ! the corner elements are not used, but it convenient (and faster) ! to compute the whole thing with a simple loop. Make sure those ! values are nonzero by initializing the whole thing here. ! do k = 0, grid_points(3)-1 do j = 0, grid_points(2)-1 do i = 0, grid_points(1)-1 do m = 1, 5 u(m,i,j,k) = 1.0 end do end do end do end do ! ! first store the "interpolated" values everywhere on the grid ! do k = 0, grid_points(3)-1 zeta = dble(k) * dnzm1 do j = 0, grid_points(2)-1 eta = dble(j) * dnym1 do i = 0, grid_points(1)-1 xi = dble(i) * dnxm1 do ix = 1, 2 call exact_solution(dble(ix-1), eta, zeta, & Pface(1,1,ix)) end do do iy = 1, 2 call exact_solution(xi, dble(iy-1) , zeta, & Pface(1,2,iy)) end do do iz = 1, 2 call exact_solution(xi, eta, dble(iz-1), & Pface(1,3,iz)) end do do m = 1, 5 Pxi = xi * Pface(m,1,2) + & (1.0d0-xi) * Pface(m,1,1) Peta = eta * Pface(m,2,2) + & (1.0d0-eta) * Pface(m,2,1) Pzeta = zeta * Pface(m,3,2) + & (1.0d0-zeta) * Pface(m,3,1) u(m,i,j,k) = Pxi + Peta + Pzeta - & Pxi*Peta - Pxi*Pzeta - Peta*Pzeta + & Pxi*Peta*Pzeta end do end do end do end do ! ! now store the exact values on the boundaries ! ! ! west face ! i = 0 xi = 0.0d0 do k = 0, grid_points(3)-1 zeta = dble(k) * dnzm1 do j = 0, grid_points(2)-1 eta = dble(j) * dnym1 call exact_solution(xi, eta, zeta, temp) do m = 1, 5 u(m,i,j,k) = temp(m) end do end do end do ! ! east face ! i = grid_points(1)-1 xi = 1.0d0 do k = 0, grid_points(3)-1 zeta = dble(k) * dnzm1 do j = 0, grid_points(2)-1 eta = dble(j) * dnym1 call exact_solution(xi, eta, zeta, temp) do m = 1, 5 u(m,i,j,k) = temp(m) end do end do end do ! ! south face ! j = 0 eta = 0.0d0 do k = 0, grid_points(3)-1 zeta = dble(k) * dnzm1 do i = 0, grid_points(1)-1 xi = dble(i) * dnxm1 call exact_solution(xi, eta, zeta, temp) do m = 1, 5 u(m,i,j,k) = temp(m) end do end do end do ! ! north face ! j = grid_points(2)-1 eta = 1.0d0 do k = 0, grid_points(3)-1 zeta = dble(k) * dnzm1 do i = 0, grid_points(1)-1 xi = dble(i) * dnxm1 call exact_solution(xi, eta, zeta, temp) do m = 1, 5 u(m,i,j,k) = temp(m) end do end do end do ! ! bottom face ! k = 0 zeta = 0.0d0 do j = 0, grid_points(2)-1 eta = dble(j) * dnym1 do i =0, grid_points(1)-1 xi = dble(i) *dnxm1 call exact_solution(xi, eta, zeta, temp) do m = 1, 5 u(m,i,j,k) = temp(m) end do end do end do ! ! top face ! k = grid_points(3)-1 zeta = 1.0d0 do j = 0, grid_points(2)-1 eta = dble(j) * dnym1 do i =0, grid_points(1)-1 xi = dble(i) * dnxm1 call exact_solution(xi, eta, zeta, temp) do m = 1, 5 u(m,i,j,k) = temp(m) end do end do end do return end subroutine lhsinit(lhs, size) !*****************************************************************************80 ! !! LHSINIT initializes items on the left hand side of the equations. ! implicit none integer size double precision lhs(5,5,3,0:size) integer i, m, n i = size ! ! zero the whole left hand side for starters ! do m = 1, 5 do n = 1, 5 lhs(m,n,1,0) = 0.0d0 lhs(m,n,2,0) = 0.0d0 lhs(m,n,3,0) = 0.0d0 lhs(m,n,1,i) = 0.0d0 lhs(m,n,2,i) = 0.0d0 lhs(m,n,3,i) = 0.0d0 end do end do ! ! next, set all diagonal values to 1. This is overkill, but convenient ! do m = 1, 5 lhs(m,m,2,0) = 1.0d0 lhs(m,m,2,i) = 1.0d0 end do return end subroutine print_results(name, class, n1, n2, n3, niter, & t, mops, optype, verified, npbversion, & compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) !*****************************************************************************80 ! !! PRINT_RESULTS prints the results. ! implicit none character name*(*) character class*1 integer n1, n2, n3, niter, j double precision t, mops character optype*24, size*15 logical verified character*(*) npbversion, compiletime, & cs1, cs2, cs3, cs4, cs5, cs6, cs7 write (*, 2) name 2 format(//, ' ', A, ' Benchmark Completed.') write (*, 3) Class 3 format(' Class = ', 12x, a12) ! ! If this is not a grid-based problem (EP, FT, CG), then ! we only print n1, which contains some measure of the ! problem size. In that case, n2 and n3 are both zero. ! Otherwise, we print the grid size n1xn2xn3 ! if ((n2 == 0) .and. (n3 == 0)) then if (name(1:2) == 'EP') then write(size, '(f15.0)' ) 2.d0**n1 j = 15 if (size(j:j) == '.') then size(j:j) = ' ' j = j - 1 end if write (*,42) size(1:j) 42 format(' Size = ',9x, a15) else write (*,44) n1 44 format(' Size = ',12x, i12) end if else write (*, 4) n1,n2,n3 4 format(' Size = ',9x, i4,'x',i4,'x',i4) end if write (*, 5) niter 5 format(' Iterations = ', 12x, i12) write (*, 6) t 6 format(' Time in seconds = ',12x, f12.2) write (*,9) mops 9 format(' Mop/s total = ',12x, f12.2) write(*, 11) optype 11 format(' Operation type = ', a24) if (verified) then write(*,12) ' SUCCESSFUL' else write(*,12) 'UNSUCCESSFUL' end if 12 format(' Verification = ', 12x, a) write(*,13) npbversion 13 format(' Version = ', 12x, a12) write(*,14) compiletime 14 format(' Compile date = ', 12x, a12) write (*,121) cs1 121 format(/, ' Compile options:', /, & ' F77 = ', A) write (*,122) cs2 122 format(' FLINK = ', A) write (*,123) cs3 123 format(' F_LIB = ', A) write (*,124) cs4 124 format(' F_INC = ', A) write (*,125) cs5 125 format(' FFLAGS = ', A) write (*,126) cs6 126 format(' FLINKFLAGS = ', A) write(*, 127) cs7 127 format(' RAND = ', A) write (*,130) 130 format(//' Please send all errors/feedbacks to:'// & ' NPB Development Team'/ & ' npb@nas.nasa.gov'//) return end subroutine compute_rhs ( ) !*****************************************************************************80 ! !! COMPUTE_RHS computes the right hand side. ! include 'header.h' integer i, j, k, m double precision rho_inv, uijk, up1, um1, vijk, vp1, vm1, & wijk, wp1, wm1 if (timeron) call timer_start(t_rhs) ! ! compute the reciprocal of density, and the kinetic energy, ! and the speed of sound. ! do k = 0, grid_points(3)-1 do j = 0, grid_points(2)-1 do i = 0, grid_points(1)-1 rho_inv = 1.0d0/u(1,i,j,k) rho_i(i,j,k) = rho_inv us(i,j,k) = u(2,i,j,k) * rho_inv vs(i,j,k) = u(3,i,j,k) * rho_inv ws(i,j,k) = u(4,i,j,k) * rho_inv square(i,j,k) = 0.5d0* ( & u(2,i,j,k)*u(2,i,j,k) + & u(3,i,j,k)*u(3,i,j,k) + & u(4,i,j,k)*u(4,i,j,k) ) * rho_inv qs(i,j,k) = square(i,j,k) * rho_inv end do end do end do ! ! copy the exact forcing term to the right hand side; because ! this forcing term is known, we can store it on the whole grid ! including the boundary ! do k = 0, grid_points(3)-1 do j = 0, grid_points(2)-1 do i = 0, grid_points(1)-1 do m = 1, 5 rhs(m,i,j,k) = forcing(m,i,j,k) end do end do end do end do if (timeron) call timer_start(t_rhsx) ! ! compute xi-direction fluxes ! do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 uijk = us(i,j,k) up1 = us(i+1,j,k) um1 = us(i-1,j,k) rhs(1,i,j,k) = rhs(1,i,j,k) + dx1tx1 * & (u(1,i+1,j,k) - 2.0d0*u(1,i,j,k) + & u(1,i-1,j,k)) - & tx2 * (u(2,i+1,j,k) - u(2,i-1,j,k)) rhs(2,i,j,k) = rhs(2,i,j,k) + dx2tx1 * & (u(2,i+1,j,k) - 2.0d0*u(2,i,j,k) + & u(2,i-1,j,k)) + & xxcon2*con43 * (up1 - 2.0d0*uijk + um1) - & tx2 * (u(2,i+1,j,k)*up1 - & u(2,i-1,j,k)*um1 + & (u(5,i+1,j,k)- square(i+1,j,k)- & u(5,i-1,j,k)+ square(i-1,j,k))* & c2) rhs(3,i,j,k) = rhs(3,i,j,k) + dx3tx1 * & (u(3,i+1,j,k) - 2.0d0*u(3,i,j,k) + & u(3,i-1,j,k)) + & xxcon2 * (vs(i+1,j,k) - 2.0d0*vs(i,j,k) + & vs(i-1,j,k)) - & tx2 * (u(3,i+1,j,k)*up1 - & u(3,i-1,j,k)*um1) rhs(4,i,j,k) = rhs(4,i,j,k) + dx4tx1 * & (u(4,i+1,j,k) - 2.0d0*u(4,i,j,k) + & u(4,i-1,j,k)) + & xxcon2 * (ws(i+1,j,k) - 2.0d0*ws(i,j,k) + & ws(i-1,j,k)) - & tx2 * (u(4,i+1,j,k)*up1 - & u(4,i-1,j,k)*um1) rhs(5,i,j,k) = rhs(5,i,j,k) + dx5tx1 * & (u(5,i+1,j,k) - 2.0d0*u(5,i,j,k) + & u(5,i-1,j,k)) + & xxcon3 * (qs(i+1,j,k) - 2.0d0*qs(i,j,k) + & qs(i-1,j,k)) + & xxcon4 * (up1*up1 - 2.0d0*uijk*uijk + & um1*um1) + & xxcon5 * (u(5,i+1,j,k)*rho_i(i+1,j,k) - & 2.0d0*u(5,i,j,k)*rho_i(i,j,k) + & u(5,i-1,j,k)*rho_i(i-1,j,k)) - & tx2 * ( (c1*u(5,i+1,j,k) - & c2*square(i+1,j,k))*up1 - & (c1*u(5,i-1,j,k) - & c2*square(i-1,j,k))*um1 ) end do end do ! ! add fourth order xi-direction dissipation ! do j = 1, grid_points(2)-2 i = 1 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k)- dssp * & ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) + & u(m,i+2,j,k)) end do i = 2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & (-4.0d0*u(m,i-1,j,k) + 6.0d0*u(m,i,j,k) - & 4.0d0*u(m,i+1,j,k) + u(m,i+2,j,k)) end do end do do j = 1, grid_points(2)-2 do i = 3,grid_points(1)-4 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i-2,j,k) - 4.0d0*u(m,i-1,j,k) + & 6.0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) + & u(m,i+2,j,k) ) end do end do end do do j = 1, grid_points(2)-2 i = grid_points(1)-3 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i-2,j,k) - 4.0d0*u(m,i-1,j,k) + & 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) ) end do i = grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i-2,j,k) - 4.d0*u(m,i-1,j,k) + & 5.d0*u(m,i,j,k) ) end do end do end do if (timeron) call timer_stop(t_rhsx) if (timeron) call timer_start(t_rhsy) ! ! compute eta-direction fluxes ! do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 vijk = vs(i,j,k) vp1 = vs(i,j+1,k) vm1 = vs(i,j-1,k) rhs(1,i,j,k) = rhs(1,i,j,k) + dy1ty1 * & (u(1,i,j+1,k) - 2.0d0*u(1,i,j,k) + & u(1,i,j-1,k)) - & ty2 * (u(3,i,j+1,k) - u(3,i,j-1,k)) rhs(2,i,j,k) = rhs(2,i,j,k) + dy2ty1 * & (u(2,i,j+1,k) - 2.0d0*u(2,i,j,k) + & u(2,i,j-1,k)) + & yycon2 * (us(i,j+1,k) - 2.0d0*us(i,j,k) + & us(i,j-1,k)) - & ty2 * (u(2,i,j+1,k)*vp1 - & u(2,i,j-1,k)*vm1) rhs(3,i,j,k) = rhs(3,i,j,k) + dy3ty1 * & (u(3,i,j+1,k) - 2.0d0*u(3,i,j,k) + & u(3,i,j-1,k)) + & yycon2*con43 * (vp1 - 2.0d0*vijk + vm1) - & ty2 * (u(3,i,j+1,k)*vp1 - & u(3,i,j-1,k)*vm1 + & (u(5,i,j+1,k) - square(i,j+1,k) - & u(5,i,j-1,k) + square(i,j-1,k)) & *c2) rhs(4,i,j,k) = rhs(4,i,j,k) + dy4ty1 * & (u(4,i,j+1,k) - 2.0d0*u(4,i,j,k) + & u(4,i,j-1,k)) + & yycon2 * (ws(i,j+1,k) - 2.0d0*ws(i,j,k) + & ws(i,j-1,k)) - & ty2 * (u(4,i,j+1,k)*vp1 - & u(4,i,j-1,k)*vm1) rhs(5,i,j,k) = rhs(5,i,j,k) + dy5ty1 * & (u(5,i,j+1,k) - 2.0d0*u(5,i,j,k) + & u(5,i,j-1,k)) + & yycon3 * (qs(i,j+1,k) - 2.0d0*qs(i,j,k) + & qs(i,j-1,k)) + & yycon4 * (vp1*vp1 - 2.0d0*vijk*vijk + & vm1*vm1) + & yycon5 * (u(5,i,j+1,k)*rho_i(i,j+1,k) - & 2.0d0*u(5,i,j,k)*rho_i(i,j,k) + & u(5,i,j-1,k)*rho_i(i,j-1,k)) - & ty2 * ((c1*u(5,i,j+1,k) - & c2*square(i,j+1,k)) * vp1 - & (c1*u(5,i,j-1,k) - & c2*square(i,j-1,k)) * vm1) end do end do ! ! add fourth order eta-direction dissipation ! j = 1 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k)- dssp * & ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) + & u(m,i,j+2,k)) end do end do j = 2 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & (-4.0d0*u(m,i,j-1,k) + 6.0d0*u(m,i,j,k) - & 4.0d0*u(m,i,j+1,k) + u(m,i,j+2,k)) end do end do do j = 3, grid_points(2)-4 do i = 1,grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i,j-2,k) - 4.0d0*u(m,i,j-1,k) + & 6.0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) + & u(m,i,j+2,k) ) end do end do end do j = grid_points(2)-3 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i,j-2,k) - 4.0d0*u(m,i,j-1,k) + & 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) ) end do end do j = grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i,j-2,k) - 4.d0*u(m,i,j-1,k) + & 5.d0*u(m,i,j,k) ) end do end do end do if (timeron) call timer_stop(t_rhsy) if (timeron) call timer_start(t_rhsz) ! ! compute zeta-direction fluxes ! do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 wijk = ws(i,j,k) wp1 = ws(i,j,k+1) wm1 = ws(i,j,k-1) rhs(1,i,j,k) = rhs(1,i,j,k) + dz1tz1 * & (u(1,i,j,k+1) - 2.0d0*u(1,i,j,k) + & u(1,i,j,k-1)) - & tz2 * (u(4,i,j,k+1) - u(4,i,j,k-1)) rhs(2,i,j,k) = rhs(2,i,j,k) + dz2tz1 * & (u(2,i,j,k+1) - 2.0d0*u(2,i,j,k) + & u(2,i,j,k-1)) + & zzcon2 * (us(i,j,k+1) - 2.0d0*us(i,j,k) + & us(i,j,k-1)) - & tz2 * (u(2,i,j,k+1)*wp1 - & u(2,i,j,k-1)*wm1) rhs(3,i,j,k) = rhs(3,i,j,k) + dz3tz1 * & (u(3,i,j,k+1) - 2.0d0*u(3,i,j,k) + & u(3,i,j,k-1)) + & zzcon2 * (vs(i,j,k+1) - 2.0d0*vs(i,j,k) + & vs(i,j,k-1)) - & tz2 * (u(3,i,j,k+1)*wp1 - & u(3,i,j,k-1)*wm1) rhs(4,i,j,k) = rhs(4,i,j,k) + dz4tz1 * & (u(4,i,j,k+1) - 2.0d0*u(4,i,j,k) + & u(4,i,j,k-1)) + & zzcon2*con43 * (wp1 - 2.0d0*wijk + wm1) - & tz2 * (u(4,i,j,k+1)*wp1 - & u(4,i,j,k-1)*wm1 + & (u(5,i,j,k+1) - square(i,j,k+1) - & u(5,i,j,k-1) + square(i,j,k-1)) & *c2) rhs(5,i,j,k) = rhs(5,i,j,k) + dz5tz1 * & (u(5,i,j,k+1) - 2.0d0*u(5,i,j,k) + & u(5,i,j,k-1)) + & zzcon3 * (qs(i,j,k+1) - 2.0d0*qs(i,j,k) + & qs(i,j,k-1)) + & zzcon4 * (wp1*wp1 - 2.0d0*wijk*wijk + & wm1*wm1) + & zzcon5 * (u(5,i,j,k+1)*rho_i(i,j,k+1) - & 2.0d0*u(5,i,j,k)*rho_i(i,j,k) + & u(5,i,j,k-1)*rho_i(i,j,k-1)) - & tz2 * ( (c1*u(5,i,j,k+1) - & c2*square(i,j,k+1))*wp1 - & (c1*u(5,i,j,k-1) - & c2*square(i,j,k-1))*wm1) end do end do end do ! ! add fourth order zeta-direction dissipation ! k = 1 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k)- dssp * & ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) + & u(m,i,j,k+2)) end do end do end do k = 2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & (-4.0d0*u(m,i,j,k-1) + 6.0d0*u(m,i,j,k) - & 4.0d0*u(m,i,j,k+1) + u(m,i,j,k+2)) end do end do end do do k = 3, grid_points(3)-4 do j = 1, grid_points(2)-2 do i = 1,grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i,j,k-2) - 4.0d0*u(m,i,j,k-1) + & 6.0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) + & u(m,i,j,k+2) ) end do end do end do end do k = grid_points(3)-3 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i,j,k-2) - 4.0d0*u(m,i,j,k-1) + & 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) ) end do end do end do k = grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) - dssp * & ( u(m,i,j,k-2) - 4.d0*u(m,i,j,k-1) + & 5.d0*u(m,i,j,k) ) end do end do end do if (timeron) call timer_stop(t_rhsz) do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do m = 1, 5 rhs(m,i,j,k) = rhs(m,i,j,k) * dt end do end do end do end do if (timeron) call timer_stop(t_rhs) return end subroutine set_constants ( ) !*****************************************************************************80 ! !! SET_CONSTANTS sets problem constants. ! include 'header.h' ce(1,1) = 2.0d0 ce(1,2) = 0.0d0 ce(1,3) = 0.0d0 ce(1,4) = 4.0d0 ce(1,5) = 5.0d0 ce(1,6) = 3.0d0 ce(1,7) = 0.5d0 ce(1,8) = 0.02d0 ce(1,9) = 0.01d0 ce(1,10) = 0.03d0 ce(1,11) = 0.5d0 ce(1,12) = 0.4d0 ce(1,13) = 0.3d0 ce(2,1) = 1.0d0 ce(2,2) = 0.0d0 ce(2,3) = 0.0d0 ce(2,4) = 0.0d0 ce(2,5) = 1.0d0 ce(2,6) = 2.0d0 ce(2,7) = 3.0d0 ce(2,8) = 0.01d0 ce(2,9) = 0.03d0 ce(2,10) = 0.02d0 ce(2,11) = 0.4d0 ce(2,12) = 0.3d0 ce(2,13) = 0.5d0 ce(3,1) = 2.0d0 ce(3,2) = 2.0d0 ce(3,3) = 0.0d0 ce(3,4) = 0.0d0 ce(3,5) = 0.0d0 ce(3,6) = 2.0d0 ce(3,7) = 3.0d0 ce(3,8) = 0.04d0 ce(3,9) = 0.03d0 ce(3,10) = 0.05d0 ce(3,11) = 0.3d0 ce(3,12) = 0.5d0 ce(3,13) = 0.4d0 ce(4,1) = 2.0d0 ce(4,2) = 2.0d0 ce(4,3) = 0.0d0 ce(4,4) = 0.0d0 ce(4,5) = 0.0d0 ce(4,6) = 2.0d0 ce(4,7) = 3.0d0 ce(4,8) = 0.03d0 ce(4,9) = 0.05d0 ce(4,10) = 0.04d0 ce(4,11) = 0.2d0 ce(4,12) = 0.1d0 ce(4,13) = 0.3d0 ce(5,1) = 5.0d0 ce(5,2) = 4.0d0 ce(5,3) = 3.0d0 ce(5,4) = 2.0d0 ce(5,5) = 0.1d0 ce(5,6) = 0.4d0 ce(5,7) = 0.3d0 ce(5,8) = 0.05d0 ce(5,9) = 0.04d0 ce(5,10) = 0.03d0 ce(5,11) = 0.1d0 ce(5,12) = 0.3d0 ce(5,13) = 0.2d0 c1 = 1.4d0 c2 = 0.4d0 c3 = 0.1d0 c4 = 1.0d0 c5 = 1.4d0 dnxm1 = 1.0d0 / dble(grid_points(1)-1) dnym1 = 1.0d0 / dble(grid_points(2)-1) dnzm1 = 1.0d0 / dble(grid_points(3)-1) c1c2 = c1 * c2 c1c5 = c1 * c5 c3c4 = c3 * c4 c1345 = c1c5 * c3c4 conz1 = (1.0d0-c1c5) tx1 = 1.0d0 / (dnxm1 * dnxm1) tx2 = 1.0d0 / (2.0d0 * dnxm1) tx3 = 1.0d0 / dnxm1 ty1 = 1.0d0 / (dnym1 * dnym1) ty2 = 1.0d0 / (2.0d0 * dnym1) ty3 = 1.0d0 / dnym1 tz1 = 1.0d0 / (dnzm1 * dnzm1) tz2 = 1.0d0 / (2.0d0 * dnzm1) tz3 = 1.0d0 / dnzm1 dx1 = 0.75d0 dx2 = 0.75d0 dx3 = 0.75d0 dx4 = 0.75d0 dx5 = 0.75d0 dy1 = 0.75d0 dy2 = 0.75d0 dy3 = 0.75d0 dy4 = 0.75d0 dy5 = 0.75d0 dz1 = 1.0d0 dz2 = 1.0d0 dz3 = 1.0d0 dz4 = 1.0d0 dz5 = 1.0d0 dxmax = dmax1(dx3, dx4) dymax = dmax1(dy2, dy4) dzmax = dmax1(dz2, dz3) dssp = 0.25d0 * dmax1(dx1, dmax1(dy1, dz1) ) c4dssp = 4.0d0 * dssp c5dssp = 5.0d0 * dssp dttx1 = dt*tx1 dttx2 = dt*tx2 dtty1 = dt*ty1 dtty2 = dt*ty2 dttz1 = dt*tz1 dttz2 = dt*tz2 c2dttx1 = 2.0d0*dttx1 c2dtty1 = 2.0d0*dtty1 c2dttz1 = 2.0d0*dttz1 dtdssp = dt*dssp comz1 = dtdssp comz4 = 4.0d0*dtdssp comz5 = 5.0d0*dtdssp comz6 = 6.0d0*dtdssp c3c4tx3 = c3c4*tx3 c3c4ty3 = c3c4*ty3 c3c4tz3 = c3c4*tz3 dx1tx1 = dx1*tx1 dx2tx1 = dx2*tx1 dx3tx1 = dx3*tx1 dx4tx1 = dx4*tx1 dx5tx1 = dx5*tx1 dy1ty1 = dy1*ty1 dy2ty1 = dy2*ty1 dy3ty1 = dy3*ty1 dy4ty1 = dy4*ty1 dy5ty1 = dy5*ty1 dz1tz1 = dz1*tz1 dz2tz1 = dz2*tz1 dz3tz1 = dz3*tz1 dz4tz1 = dz4*tz1 dz5tz1 = dz5*tz1 c2iv = 2.5d0 con43 = 4.0d0/3.0d0 con16 = 1.0d0/6.0d0 xxcon1 = c3c4tx3*con43*tx3 xxcon2 = c3c4tx3*tx3 xxcon3 = c3c4tx3*conz1*tx3 xxcon4 = c3c4tx3*con16*tx3 xxcon5 = c3c4tx3*c1c5*tx3 yycon1 = c3c4ty3*con43*ty3 yycon2 = c3c4ty3*ty3 yycon3 = c3c4ty3*conz1*ty3 yycon4 = c3c4ty3*con16*ty3 yycon5 = c3c4ty3*c1c5*ty3 zzcon1 = c3c4tz3*con43*tz3 zzcon2 = c3c4tz3*tz3 zzcon3 = c3c4tz3*conz1*tz3 zzcon4 = c3c4tz3*con16*tz3 zzcon5 = c3c4tz3*c1c5*tz3 return end subroutine matvec_sub(ablock,avec,bvec) !*****************************************************************************80 ! !! MATVEC_SUB computes bvec := bvec - ablock * avec ! implicit none double precision ablock,avec,bvec dimension ablock(5,5),avec(5),bvec(5) bvec(1) = bvec(1) - ablock(1,1)*avec(1) & - ablock(1,2)*avec(2) & - ablock(1,3)*avec(3) & - ablock(1,4)*avec(4) & - ablock(1,5)*avec(5) bvec(2) = bvec(2) - ablock(2,1)*avec(1) & - ablock(2,2)*avec(2) & - ablock(2,3)*avec(3) & - ablock(2,4)*avec(4) & - ablock(2,5)*avec(5) bvec(3) = bvec(3) - ablock(3,1)*avec(1) & - ablock(3,2)*avec(2) & - ablock(3,3)*avec(3) & - ablock(3,4)*avec(4) & - ablock(3,5)*avec(5) bvec(4) = bvec(4) - ablock(4,1)*avec(1) & - ablock(4,2)*avec(2) & - ablock(4,3)*avec(3) & - ablock(4,4)*avec(4) & - ablock(4,5)*avec(5) bvec(5) = bvec(5) - ablock(5,1)*avec(1) & - ablock(5,2)*avec(2) & - ablock(5,3)*avec(3) & - ablock(5,4)*avec(4) & - ablock(5,5)*avec(5) return end subroutine matmul_sub(ablock, bblock, cblock) !*****************************************************************************80 ! !! MATMUL_SUB subtracts a(i,j,k) X b(i,j,k) from c(i,j,k) ! implicit none double precision ablock, bblock, cblock dimension ablock(5,5), bblock(5,5), cblock(5,5) cblock(1,1) = cblock(1,1) - ablock(1,1)*bblock(1,1) & - ablock(1,2)*bblock(2,1) & - ablock(1,3)*bblock(3,1) & - ablock(1,4)*bblock(4,1) & - ablock(1,5)*bblock(5,1) cblock(2,1) = cblock(2,1) - ablock(2,1)*bblock(1,1) & - ablock(2,2)*bblock(2,1) & - ablock(2,3)*bblock(3,1) & - ablock(2,4)*bblock(4,1) & - ablock(2,5)*bblock(5,1) cblock(3,1) = cblock(3,1) - ablock(3,1)*bblock(1,1) & - ablock(3,2)*bblock(2,1) & - ablock(3,3)*bblock(3,1) & - ablock(3,4)*bblock(4,1) & - ablock(3,5)*bblock(5,1) cblock(4,1) = cblock(4,1) - ablock(4,1)*bblock(1,1) & - ablock(4,2)*bblock(2,1) & - ablock(4,3)*bblock(3,1) & - ablock(4,4)*bblock(4,1) & - ablock(4,5)*bblock(5,1) cblock(5,1) = cblock(5,1) - ablock(5,1)*bblock(1,1) & - ablock(5,2)*bblock(2,1) & - ablock(5,3)*bblock(3,1) & - ablock(5,4)*bblock(4,1) & - ablock(5,5)*bblock(5,1) cblock(1,2) = cblock(1,2) - ablock(1,1)*bblock(1,2) & - ablock(1,2)*bblock(2,2) & - ablock(1,3)*bblock(3,2) & - ablock(1,4)*bblock(4,2) & - ablock(1,5)*bblock(5,2) cblock(2,2) = cblock(2,2) - ablock(2,1)*bblock(1,2) & - ablock(2,2)*bblock(2,2) & - ablock(2,3)*bblock(3,2) & - ablock(2,4)*bblock(4,2) & - ablock(2,5)*bblock(5,2) cblock(3,2) = cblock(3,2) - ablock(3,1)*bblock(1,2) & - ablock(3,2)*bblock(2,2) & - ablock(3,3)*bblock(3,2) & - ablock(3,4)*bblock(4,2) & - ablock(3,5)*bblock(5,2) cblock(4,2) = cblock(4,2) - ablock(4,1)*bblock(1,2) & - ablock(4,2)*bblock(2,2) & - ablock(4,3)*bblock(3,2) & - ablock(4,4)*bblock(4,2) & - ablock(4,5)*bblock(5,2) cblock(5,2) = cblock(5,2) - ablock(5,1)*bblock(1,2) & - ablock(5,2)*bblock(2,2) & - ablock(5,3)*bblock(3,2) & - ablock(5,4)*bblock(4,2) & - ablock(5,5)*bblock(5,2) cblock(1,3) = cblock(1,3) - ablock(1,1)*bblock(1,3) & - ablock(1,2)*bblock(2,3) & - ablock(1,3)*bblock(3,3) & - ablock(1,4)*bblock(4,3) & - ablock(1,5)*bblock(5,3) cblock(2,3) = cblock(2,3) - ablock(2,1)*bblock(1,3) & - ablock(2,2)*bblock(2,3) & - ablock(2,3)*bblock(3,3) & - ablock(2,4)*bblock(4,3) & - ablock(2,5)*bblock(5,3) cblock(3,3) = cblock(3,3) - ablock(3,1)*bblock(1,3) & - ablock(3,2)*bblock(2,3) & - ablock(3,3)*bblock(3,3) & - ablock(3,4)*bblock(4,3) & - ablock(3,5)*bblock(5,3) cblock(4,3) = cblock(4,3) - ablock(4,1)*bblock(1,3) & - ablock(4,2)*bblock(2,3) & - ablock(4,3)*bblock(3,3) & - ablock(4,4)*bblock(4,3) & - ablock(4,5)*bblock(5,3) cblock(5,3) = cblock(5,3) - ablock(5,1)*bblock(1,3) & - ablock(5,2)*bblock(2,3) & - ablock(5,3)*bblock(3,3) & - ablock(5,4)*bblock(4,3) & - ablock(5,5)*bblock(5,3) cblock(1,4) = cblock(1,4) - ablock(1,1)*bblock(1,4) & - ablock(1,2)*bblock(2,4) & - ablock(1,3)*bblock(3,4) & - ablock(1,4)*bblock(4,4) & - ablock(1,5)*bblock(5,4) cblock(2,4) = cblock(2,4) - ablock(2,1)*bblock(1,4) & - ablock(2,2)*bblock(2,4) & - ablock(2,3)*bblock(3,4) & - ablock(2,4)*bblock(4,4) & - ablock(2,5)*bblock(5,4) cblock(3,4) = cblock(3,4) - ablock(3,1)*bblock(1,4) & - ablock(3,2)*bblock(2,4) & - ablock(3,3)*bblock(3,4) & - ablock(3,4)*bblock(4,4) & - ablock(3,5)*bblock(5,4) cblock(4,4) = cblock(4,4) - ablock(4,1)*bblock(1,4) & - ablock(4,2)*bblock(2,4) & - ablock(4,3)*bblock(3,4) & - ablock(4,4)*bblock(4,4) & - ablock(4,5)*bblock(5,4) cblock(5,4) = cblock(5,4) - ablock(5,1)*bblock(1,4) & - ablock(5,2)*bblock(2,4) & - ablock(5,3)*bblock(3,4) & - ablock(5,4)*bblock(4,4) & - ablock(5,5)*bblock(5,4) cblock(1,5) = cblock(1,5) - ablock(1,1)*bblock(1,5) & - ablock(1,2)*bblock(2,5) & - ablock(1,3)*bblock(3,5) & - ablock(1,4)*bblock(4,5) & - ablock(1,5)*bblock(5,5) cblock(2,5) = cblock(2,5) - ablock(2,1)*bblock(1,5) & - ablock(2,2)*bblock(2,5) & - ablock(2,3)*bblock(3,5) & - ablock(2,4)*bblock(4,5) & - ablock(2,5)*bblock(5,5) cblock(3,5) = cblock(3,5) - ablock(3,1)*bblock(1,5) & - ablock(3,2)*bblock(2,5) & - ablock(3,3)*bblock(3,5) & - ablock(3,4)*bblock(4,5) & - ablock(3,5)*bblock(5,5) cblock(4,5) = cblock(4,5) - ablock(4,1)*bblock(1,5) & - ablock(4,2)*bblock(2,5) & - ablock(4,3)*bblock(3,5) & - ablock(4,4)*bblock(4,5) & - ablock(4,5)*bblock(5,5) cblock(5,5) = cblock(5,5) - ablock(5,1)*bblock(1,5) & - ablock(5,2)*bblock(2,5) & - ablock(5,3)*bblock(3,5) & - ablock(5,4)*bblock(4,5) & - ablock(5,5)*bblock(5,5) return end subroutine binvcrhs( lhs,c,r ) !*****************************************************************************80 ! !! BINVCRHS ! implicit none double precision pivot, coeff, lhs dimension lhs(5,5) double precision c(5,5), r(5) pivot = 1.00d0/lhs(1,1) lhs(1,2) = lhs(1,2)*pivot lhs(1,3) = lhs(1,3)*pivot lhs(1,4) = lhs(1,4)*pivot lhs(1,5) = lhs(1,5)*pivot c(1,1) = c(1,1)*pivot c(1,2) = c(1,2)*pivot c(1,3) = c(1,3)*pivot c(1,4) = c(1,4)*pivot c(1,5) = c(1,5)*pivot r(1) = r(1) *pivot coeff = lhs(2,1) lhs(2,2)= lhs(2,2) - coeff*lhs(1,2) lhs(2,3)= lhs(2,3) - coeff*lhs(1,3) lhs(2,4)= lhs(2,4) - coeff*lhs(1,4) lhs(2,5)= lhs(2,5) - coeff*lhs(1,5) c(2,1) = c(2,1) - coeff*c(1,1) c(2,2) = c(2,2) - coeff*c(1,2) c(2,3) = c(2,3) - coeff*c(1,3) c(2,4) = c(2,4) - coeff*c(1,4) c(2,5) = c(2,5) - coeff*c(1,5) r(2) = r(2) - coeff*r(1) coeff = lhs(3,1) lhs(3,2)= lhs(3,2) - coeff*lhs(1,2) lhs(3,3)= lhs(3,3) - coeff*lhs(1,3) lhs(3,4)= lhs(3,4) - coeff*lhs(1,4) lhs(3,5)= lhs(3,5) - coeff*lhs(1,5) c(3,1) = c(3,1) - coeff*c(1,1) c(3,2) = c(3,2) - coeff*c(1,2) c(3,3) = c(3,3) - coeff*c(1,3) c(3,4) = c(3,4) - coeff*c(1,4) c(3,5) = c(3,5) - coeff*c(1,5) r(3) = r(3) - coeff*r(1) coeff = lhs(4,1) lhs(4,2)= lhs(4,2) - coeff*lhs(1,2) lhs(4,3)= lhs(4,3) - coeff*lhs(1,3) lhs(4,4)= lhs(4,4) - coeff*lhs(1,4) lhs(4,5)= lhs(4,5) - coeff*lhs(1,5) c(4,1) = c(4,1) - coeff*c(1,1) c(4,2) = c(4,2) - coeff*c(1,2) c(4,3) = c(4,3) - coeff*c(1,3) c(4,4) = c(4,4) - coeff*c(1,4) c(4,5) = c(4,5) - coeff*c(1,5) r(4) = r(4) - coeff*r(1) coeff = lhs(5,1) lhs(5,2)= lhs(5,2) - coeff*lhs(1,2) lhs(5,3)= lhs(5,3) - coeff*lhs(1,3) lhs(5,4)= lhs(5,4) - coeff*lhs(1,4) lhs(5,5)= lhs(5,5) - coeff*lhs(1,5) c(5,1) = c(5,1) - coeff*c(1,1) c(5,2) = c(5,2) - coeff*c(1,2) c(5,3) = c(5,3) - coeff*c(1,3) c(5,4) = c(5,4) - coeff*c(1,4) c(5,5) = c(5,5) - coeff*c(1,5) r(5) = r(5) - coeff*r(1) pivot = 1.00d0/lhs(2,2) lhs(2,3) = lhs(2,3)*pivot lhs(2,4) = lhs(2,4)*pivot lhs(2,5) = lhs(2,5)*pivot c(2,1) = c(2,1)*pivot c(2,2) = c(2,2)*pivot c(2,3) = c(2,3)*pivot c(2,4) = c(2,4)*pivot c(2,5) = c(2,5)*pivot r(2) = r(2) *pivot coeff = lhs(1,2) lhs(1,3)= lhs(1,3) - coeff*lhs(2,3) lhs(1,4)= lhs(1,4) - coeff*lhs(2,4) lhs(1,5)= lhs(1,5) - coeff*lhs(2,5) c(1,1) = c(1,1) - coeff*c(2,1) c(1,2) = c(1,2) - coeff*c(2,2) c(1,3) = c(1,3) - coeff*c(2,3) c(1,4) = c(1,4) - coeff*c(2,4) c(1,5) = c(1,5) - coeff*c(2,5) r(1) = r(1) - coeff*r(2) coeff = lhs(3,2) lhs(3,3)= lhs(3,3) - coeff*lhs(2,3) lhs(3,4)= lhs(3,4) - coeff*lhs(2,4) lhs(3,5)= lhs(3,5) - coeff*lhs(2,5) c(3,1) = c(3,1) - coeff*c(2,1) c(3,2) = c(3,2) - coeff*c(2,2) c(3,3) = c(3,3) - coeff*c(2,3) c(3,4) = c(3,4) - coeff*c(2,4) c(3,5) = c(3,5) - coeff*c(2,5) r(3) = r(3) - coeff*r(2) coeff = lhs(4,2) lhs(4,3)= lhs(4,3) - coeff*lhs(2,3) lhs(4,4)= lhs(4,4) - coeff*lhs(2,4) lhs(4,5)= lhs(4,5) - coeff*lhs(2,5) c(4,1) = c(4,1) - coeff*c(2,1) c(4,2) = c(4,2) - coeff*c(2,2) c(4,3) = c(4,3) - coeff*c(2,3) c(4,4) = c(4,4) - coeff*c(2,4) c(4,5) = c(4,5) - coeff*c(2,5) r(4) = r(4) - coeff*r(2) coeff = lhs(5,2) lhs(5,3)= lhs(5,3) - coeff*lhs(2,3) lhs(5,4)= lhs(5,4) - coeff*lhs(2,4) lhs(5,5)= lhs(5,5) - coeff*lhs(2,5) c(5,1) = c(5,1) - coeff*c(2,1) c(5,2) = c(5,2) - coeff*c(2,2) c(5,3) = c(5,3) - coeff*c(2,3) c(5,4) = c(5,4) - coeff*c(2,4) c(5,5) = c(5,5) - coeff*c(2,5) r(5) = r(5) - coeff*r(2) pivot = 1.00d0/lhs(3,3) lhs(3,4) = lhs(3,4)*pivot lhs(3,5) = lhs(3,5)*pivot c(3,1) = c(3,1)*pivot c(3,2) = c(3,2)*pivot c(3,3) = c(3,3)*pivot c(3,4) = c(3,4)*pivot c(3,5) = c(3,5)*pivot r(3) = r(3) *pivot coeff = lhs(1,3) lhs(1,4)= lhs(1,4) - coeff*lhs(3,4) lhs(1,5)= lhs(1,5) - coeff*lhs(3,5) c(1,1) = c(1,1) - coeff*c(3,1) c(1,2) = c(1,2) - coeff*c(3,2) c(1,3) = c(1,3) - coeff*c(3,3) c(1,4) = c(1,4) - coeff*c(3,4) c(1,5) = c(1,5) - coeff*c(3,5) r(1) = r(1) - coeff*r(3) coeff = lhs(2,3) lhs(2,4)= lhs(2,4) - coeff*lhs(3,4) lhs(2,5)= lhs(2,5) - coeff*lhs(3,5) c(2,1) = c(2,1) - coeff*c(3,1) c(2,2) = c(2,2) - coeff*c(3,2) c(2,3) = c(2,3) - coeff*c(3,3) c(2,4) = c(2,4) - coeff*c(3,4) c(2,5) = c(2,5) - coeff*c(3,5) r(2) = r(2) - coeff*r(3) coeff = lhs(4,3) lhs(4,4)= lhs(4,4) - coeff*lhs(3,4) lhs(4,5)= lhs(4,5) - coeff*lhs(3,5) c(4,1) = c(4,1) - coeff*c(3,1) c(4,2) = c(4,2) - coeff*c(3,2) c(4,3) = c(4,3) - coeff*c(3,3) c(4,4) = c(4,4) - coeff*c(3,4) c(4,5) = c(4,5) - coeff*c(3,5) r(4) = r(4) - coeff*r(3) coeff = lhs(5,3) lhs(5,4)= lhs(5,4) - coeff*lhs(3,4) lhs(5,5)= lhs(5,5) - coeff*lhs(3,5) c(5,1) = c(5,1) - coeff*c(3,1) c(5,2) = c(5,2) - coeff*c(3,2) c(5,3) = c(5,3) - coeff*c(3,3) c(5,4) = c(5,4) - coeff*c(3,4) c(5,5) = c(5,5) - coeff*c(3,5) r(5) = r(5) - coeff*r(3) pivot = 1.00d0/lhs(4,4) lhs(4,5) = lhs(4,5)*pivot c(4,1) = c(4,1)*pivot c(4,2) = c(4,2)*pivot c(4,3) = c(4,3)*pivot c(4,4) = c(4,4)*pivot c(4,5) = c(4,5)*pivot r(4) = r(4) *pivot coeff = lhs(1,4) lhs(1,5)= lhs(1,5) - coeff*lhs(4,5) c(1,1) = c(1,1) - coeff*c(4,1) c(1,2) = c(1,2) - coeff*c(4,2) c(1,3) = c(1,3) - coeff*c(4,3) c(1,4) = c(1,4) - coeff*c(4,4) c(1,5) = c(1,5) - coeff*c(4,5) r(1) = r(1) - coeff*r(4) coeff = lhs(2,4) lhs(2,5)= lhs(2,5) - coeff*lhs(4,5) c(2,1) = c(2,1) - coeff*c(4,1) c(2,2) = c(2,2) - coeff*c(4,2) c(2,3) = c(2,3) - coeff*c(4,3) c(2,4) = c(2,4) - coeff*c(4,4) c(2,5) = c(2,5) - coeff*c(4,5) r(2) = r(2) - coeff*r(4) coeff = lhs(3,4) lhs(3,5)= lhs(3,5) - coeff*lhs(4,5) c(3,1) = c(3,1) - coeff*c(4,1) c(3,2) = c(3,2) - coeff*c(4,2) c(3,3) = c(3,3) - coeff*c(4,3) c(3,4) = c(3,4) - coeff*c(4,4) c(3,5) = c(3,5) - coeff*c(4,5) r(3) = r(3) - coeff*r(4) coeff = lhs(5,4) lhs(5,5)= lhs(5,5) - coeff*lhs(4,5) c(5,1) = c(5,1) - coeff*c(4,1) c(5,2) = c(5,2) - coeff*c(4,2) c(5,3) = c(5,3) - coeff*c(4,3) c(5,4) = c(5,4) - coeff*c(4,4) c(5,5) = c(5,5) - coeff*c(4,5) r(5) = r(5) - coeff*r(4) pivot = 1.00d0/lhs(5,5) c(5,1) = c(5,1)*pivot c(5,2) = c(5,2)*pivot c(5,3) = c(5,3)*pivot c(5,4) = c(5,4)*pivot c(5,5) = c(5,5)*pivot r(5) = r(5) *pivot coeff = lhs(1,5) c(1,1) = c(1,1) - coeff*c(5,1) c(1,2) = c(1,2) - coeff*c(5,2) c(1,3) = c(1,3) - coeff*c(5,3) c(1,4) = c(1,4) - coeff*c(5,4) c(1,5) = c(1,5) - coeff*c(5,5) r(1) = r(1) - coeff*r(5) coeff = lhs(2,5) c(2,1) = c(2,1) - coeff*c(5,1) c(2,2) = c(2,2) - coeff*c(5,2) c(2,3) = c(2,3) - coeff*c(5,3) c(2,4) = c(2,4) - coeff*c(5,4) c(2,5) = c(2,5) - coeff*c(5,5) r(2) = r(2) - coeff*r(5) coeff = lhs(3,5) c(3,1) = c(3,1) - coeff*c(5,1) c(3,2) = c(3,2) - coeff*c(5,2) c(3,3) = c(3,3) - coeff*c(5,3) c(3,4) = c(3,4) - coeff*c(5,4) c(3,5) = c(3,5) - coeff*c(5,5) r(3) = r(3) - coeff*r(5) coeff = lhs(4,5) c(4,1) = c(4,1) - coeff*c(5,1) c(4,2) = c(4,2) - coeff*c(5,2) c(4,3) = c(4,3) - coeff*c(5,3) c(4,4) = c(4,4) - coeff*c(5,4) c(4,5) = c(4,5) - coeff*c(5,5) r(4) = r(4) - coeff*r(5) return end subroutine binvrhs( lhs,r ) !*****************************************************************************80 ! !! BINVRHS ! implicit none double precision pivot, coeff, lhs dimension lhs(5,5) double precision r(5) pivot = 1.00d0/lhs(1,1) lhs(1,2) = lhs(1,2)*pivot lhs(1,3) = lhs(1,3)*pivot lhs(1,4) = lhs(1,4)*pivot lhs(1,5) = lhs(1,5)*pivot r(1) = r(1) *pivot coeff = lhs(2,1) lhs(2,2)= lhs(2,2) - coeff*lhs(1,2) lhs(2,3)= lhs(2,3) - coeff*lhs(1,3) lhs(2,4)= lhs(2,4) - coeff*lhs(1,4) lhs(2,5)= lhs(2,5) - coeff*lhs(1,5) r(2) = r(2) - coeff*r(1) coeff = lhs(3,1) lhs(3,2)= lhs(3,2) - coeff*lhs(1,2) lhs(3,3)= lhs(3,3) - coeff*lhs(1,3) lhs(3,4)= lhs(3,4) - coeff*lhs(1,4) lhs(3,5)= lhs(3,5) - coeff*lhs(1,5) r(3) = r(3) - coeff*r(1) coeff = lhs(4,1) lhs(4,2)= lhs(4,2) - coeff*lhs(1,2) lhs(4,3)= lhs(4,3) - coeff*lhs(1,3) lhs(4,4)= lhs(4,4) - coeff*lhs(1,4) lhs(4,5)= lhs(4,5) - coeff*lhs(1,5) r(4) = r(4) - coeff*r(1) coeff = lhs(5,1) lhs(5,2)= lhs(5,2) - coeff*lhs(1,2) lhs(5,3)= lhs(5,3) - coeff*lhs(1,3) lhs(5,4)= lhs(5,4) - coeff*lhs(1,4) lhs(5,5)= lhs(5,5) - coeff*lhs(1,5) r(5) = r(5) - coeff*r(1) pivot = 1.00d0/lhs(2,2) lhs(2,3) = lhs(2,3)*pivot lhs(2,4) = lhs(2,4)*pivot lhs(2,5) = lhs(2,5)*pivot r(2) = r(2) *pivot coeff = lhs(1,2) lhs(1,3)= lhs(1,3) - coeff*lhs(2,3) lhs(1,4)= lhs(1,4) - coeff*lhs(2,4) lhs(1,5)= lhs(1,5) - coeff*lhs(2,5) r(1) = r(1) - coeff*r(2) coeff = lhs(3,2) lhs(3,3)= lhs(3,3) - coeff*lhs(2,3) lhs(3,4)= lhs(3,4) - coeff*lhs(2,4) lhs(3,5)= lhs(3,5) - coeff*lhs(2,5) r(3) = r(3) - coeff*r(2) coeff = lhs(4,2) lhs(4,3)= lhs(4,3) - coeff*lhs(2,3) lhs(4,4)= lhs(4,4) - coeff*lhs(2,4) lhs(4,5)= lhs(4,5) - coeff*lhs(2,5) r(4) = r(4) - coeff*r(2) coeff = lhs(5,2) lhs(5,3)= lhs(5,3) - coeff*lhs(2,3) lhs(5,4)= lhs(5,4) - coeff*lhs(2,4) lhs(5,5)= lhs(5,5) - coeff*lhs(2,5) r(5) = r(5) - coeff*r(2) pivot = 1.00d0/lhs(3,3) lhs(3,4) = lhs(3,4)*pivot lhs(3,5) = lhs(3,5)*pivot r(3) = r(3) *pivot coeff = lhs(1,3) lhs(1,4)= lhs(1,4) - coeff*lhs(3,4) lhs(1,5)= lhs(1,5) - coeff*lhs(3,5) r(1) = r(1) - coeff*r(3) coeff = lhs(2,3) lhs(2,4)= lhs(2,4) - coeff*lhs(3,4) lhs(2,5)= lhs(2,5) - coeff*lhs(3,5) r(2) = r(2) - coeff*r(3) coeff = lhs(4,3) lhs(4,4)= lhs(4,4) - coeff*lhs(3,4) lhs(4,5)= lhs(4,5) - coeff*lhs(3,5) r(4) = r(4) - coeff*r(3) coeff = lhs(5,3) lhs(5,4)= lhs(5,4) - coeff*lhs(3,4) lhs(5,5)= lhs(5,5) - coeff*lhs(3,5) r(5) = r(5) - coeff*r(3) pivot = 1.00d0/lhs(4,4) lhs(4,5) = lhs(4,5)*pivot r(4) = r(4) *pivot coeff = lhs(1,4) lhs(1,5)= lhs(1,5) - coeff*lhs(4,5) r(1) = r(1) - coeff*r(4) coeff = lhs(2,4) lhs(2,5)= lhs(2,5) - coeff*lhs(4,5) r(2) = r(2) - coeff*r(4) coeff = lhs(3,4) lhs(3,5)= lhs(3,5) - coeff*lhs(4,5) r(3) = r(3) - coeff*r(4) coeff = lhs(5,4) lhs(5,5)= lhs(5,5) - coeff*lhs(4,5) r(5) = r(5) - coeff*r(4) pivot = 1.00d0/lhs(5,5) r(5) = r(5) *pivot coeff = lhs(1,5) r(1) = r(1) - coeff*r(5) coeff = lhs(2,5) r(2) = r(2) - coeff*r(5) coeff = lhs(3,5) r(3) = r(3) - coeff*r(5) coeff = lhs(4,5) r(4) = r(4) - coeff*r(5) return end subroutine timer_clear(n) !*****************************************************************************80 ! !! TIMER_CLEAR clears timer information. ! implicit none integer n double precision start(64), elapsed(64) common /tt/ start, elapsed elapsed(n) = 0.0 return end subroutine timer_start(n) !*****************************************************************************80 ! !! TIMER_START starts the timer. ! implicit none external elapsed_time double precision elapsed_time integer n double precision start(64), elapsed(64) common /tt/ start, elapsed start(n) = elapsed_time() return end subroutine timer_stop(n) !*****************************************************************************80 ! !! TIMER_STOP stops the timer. ! implicit none external elapsed_time double precision elapsed_time integer n double precision start(64), elapsed(64) common /tt/ start, elapsed double precision t, now now = elapsed_time() t = now - start(n) elapsed(n) = elapsed(n) + t return end double precision function timer_read(n) !*****************************************************************************80 ! !! TIMER_READ reads the timer. ! implicit none integer n double precision start(64), elapsed(64) common /tt/ start, elapsed timer_read = elapsed(n) return end double precision function elapsed_time() !*****************************************************************************80 ! !! ELAPSED_TIME returns current wall clock time. ! implicit none double precision t ! This function must measure wall clock time, not CPU time. ! Since there is no portable timer in Fortran (77) ! we call a routine compiled in C (though the C source may have ! to be tweaked). call wtime(t) ! The following is not ok for "official" results because it reports ! CPU time not wall clock time. It may be useful for developing/testing ! on timeshared Crays, though. ! call second(t) elapsed_time = t return end subroutine verify(no_time_steps, class, verified) !*****************************************************************************80 ! !! VERIFY is the verification routine ! include 'header.h' double precision xcrref(5),xceref(5),xcrdif(5),xcedif(5), & epsilon, xce(5), xcr(5), dtref integer m, no_time_steps character class logical verified ! ! tolerance level ! epsilon = 1.0d-08 ! ! compute the error norm and the residual norm, and exit if not printing ! call error_norm(xce) call compute_rhs call rhs_norm(xcr) do m = 1, 5 xcr(m) = xcr(m) / dt end do class = 'U' verified = .true. do m = 1,5 xcrref(m) = 1.0 xceref(m) = 1.0 end do ! ! reference data for 12X12X12 grids after 60 time steps, with DT = 1.0d-02 ! if ( (grid_points(1) == 12 ) .and. & (grid_points(2) == 12 ) .and. & (grid_points(3) == 12 ) .and. & (no_time_steps == 60 )) then class = 'S' dtref = 1.0d-2 ! ! Reference values of RMS-norms of residual. ! xcrref(1) = 1.7034283709541311d-01 xcrref(2) = 1.2975252070034097d-02 xcrref(3) = 3.2527926989486055d-02 xcrref(4) = 2.6436421275166801d-02 xcrref(5) = 1.9211784131744430d-01 ! ! Reference values of RMS-norms of solution error. ! xceref(1) = 4.9976913345811579d-04 xceref(2) = 4.5195666782961927d-05 xceref(3) = 7.3973765172921357d-05 xceref(4) = 7.3821238632439731d-05 xceref(5) = 8.9269630987491446d-04 ! ! reference data for 24X24X24 grids after 200 time steps, with DT = 0.8d-3 ! elseif ( (grid_points(1) == 24) .and. & (grid_points(2) == 24) .and. & (grid_points(3) == 24) .and. & (no_time_steps == 200) ) then class = 'W' dtref = 0.8d-3 ! ! Reference values of RMS-norms of residual. ! xcrref(1) = 0.1125590409344d+03 xcrref(2) = 0.1180007595731d+02 xcrref(3) = 0.2710329767846d+02 xcrref(4) = 0.2469174937669d+02 xcrref(5) = 0.2638427874317d+03 ! ! Reference values of RMS-norms of solution error. ! xceref(1) = 0.4419655736008d+01 xceref(2) = 0.4638531260002d+00 xceref(3) = 0.1011551749967d+01 xceref(4) = 0.9235878729944d+00 xceref(5) = 0.1018045837718d+02 ! ! reference data for 64X64X64 grids after 200 time steps, with DT = 0.8d-3 ! elseif ( (grid_points(1) == 64) .and. & (grid_points(2) == 64) .and. & (grid_points(3) == 64) .and. & (no_time_steps == 200) ) then class = 'A' dtref = 0.8d-3 ! ! Reference values of RMS-norms of residual. ! xcrref(1) = 1.0806346714637264d+02 xcrref(2) = 1.1319730901220813d+01 xcrref(3) = 2.5974354511582465d+01 xcrref(4) = 2.3665622544678910d+01 xcrref(5) = 2.5278963211748344d+02 ! ! Reference values of RMS-norms of solution error. ! xceref(1) = 4.2348416040525025d+00 xceref(2) = 4.4390282496995698d-01 xceref(3) = 9.6692480136345650d-01 xceref(4) = 8.8302063039765474d-01 xceref(5) = 9.7379901770829278d+00 ! ! reference data for 102X102X102 grids after 200 time steps, ! with DT = 3.0d-04 ! elseif ( (grid_points(1) == 102) .and. & (grid_points(2) == 102) .and. & (grid_points(3) == 102) .and. & (no_time_steps == 200) ) then class = 'B' dtref = 3.0d-4 ! ! Reference values of RMS-norms of residual. ! xcrref(1) = 1.4233597229287254d+03 xcrref(2) = 9.9330522590150238d+01 xcrref(3) = 3.5646025644535285d+02 xcrref(4) = 3.2485447959084092d+02 xcrref(5) = 3.2707541254659363d+03 ! ! Reference values of RMS-norms of solution error. ! xceref(1) = 5.2969847140936856d+01 xceref(2) = 4.4632896115670668d+00 xceref(3) = 1.3122573342210174d+01 xceref(4) = 1.2006925323559144d+01 xceref(5) = 1.2459576151035986d+02 ! ! reference data for 162X162X162 grids after 200 time steps, ! with DT = 1.0d-04 ! elseif ( (grid_points(1) == 162) .and. & (grid_points(2) == 162) .and. & (grid_points(3) == 162) .and. & (no_time_steps == 200) ) then class = 'C' dtref = 1.0d-4 ! ! Reference values of RMS-norms of residual. ! xcrref(1) = 0.62398116551764615d+04 xcrref(2) = 0.50793239190423964d+03 xcrref(3) = 0.15423530093013596d+04 xcrref(4) = 0.13302387929291190d+04 xcrref(5) = 0.11604087428436455d+05 ! ! Reference values of RMS-norms of solution error. ! xceref(1) = 0.16462008369091265d+03 xceref(2) = 0.11497107903824313d+02 xceref(3) = 0.41207446207461508d+02 xceref(4) = 0.37087651059694167d+02 xceref(5) = 0.36211053051841265d+03 ! ! reference data for 408x408x408 grids after 250 time steps, ! with DT = 0.2d-04 ! elseif ( (grid_points(1) == 408) .and. & (grid_points(2) == 408) .and. & (grid_points(3) == 408) .and. & (no_time_steps == 250) ) then class = 'D' dtref = 0.2d-4 ! ! Reference values of RMS-norms of residual. ! xcrref(1) = 0.2533188551738d+05 xcrref(2) = 0.2346393716980d+04 xcrref(3) = 0.6294554366904d+04 xcrref(4) = 0.5352565376030d+04 xcrref(5) = 0.3905864038618d+05 ! ! Reference values of RMS-norms of solution error. ! xceref(1) = 0.3100009377557d+03 xceref(2) = 0.2424086324913d+02 xceref(3) = 0.7782212022645d+02 xceref(4) = 0.6835623860116d+02 xceref(5) = 0.6065737200368d+03 ! ! reference data for 1020x1020x1020 grids after 250 time steps, ! with DT = 0.4d-05 ! elseif ( (grid_points(1) == 1020) .and. & (grid_points(2) == 1020) .and. & (grid_points(3) == 1020) .and. & (no_time_steps == 250) ) then class = 'E' dtref = 0.4d-5 ! ! Reference values of RMS-norms of residual. ! xcrref(1) = 0.9795372484517d+05 xcrref(2) = 0.9739814511521d+04 xcrref(3) = 0.2467606342965d+05 xcrref(4) = 0.2092419572860d+05 xcrref(5) = 0.1392138856939d+06 ! ! Reference values of RMS-norms of solution error. ! xceref(1) = 0.4327562208414d+03 xceref(2) = 0.3699051964887d+02 xceref(3) = 0.1089845040954d+03 xceref(4) = 0.9462517622043d+02 xceref(5) = 0.7765512765309d+03 else verified = .false. end if ! ! verification test for residuals if gridsize is one of ! the defined grid sizes above (class .ne. 'U') ! ! ! Compute the difference of solution values and the known reference values. ! do m = 1, 5 xcrdif(m) = dabs((xcr(m)-xcrref(m))/xcrref(m)) xcedif(m) = dabs((xce(m)-xceref(m))/xceref(m)) end do ! ! Output the comparison of computed results to known cases. ! if (class .ne. 'U') then write(*, 1990) class 1990 format(' Verification being performed for class ', a) write (*,2000) epsilon 2000 format(' accuracy setting for epsilon = ', E20.13) verified = (dabs(dt-dtref) .le. epsilon) if (.not.verified) then class = 'U' write (*,1000) dtref 1000 format(' DT does not match the reference value of ', & E15.8) end if else write(*, 1995) 1995 format(' Unknown class') end if if (class .ne. 'U') then write (*, 2001) else write (*, 2005) end if 2001 format(' Comparison of RMS-norms of residual') 2005 format(' RMS-norms of residual') do m = 1, 5 if (class == 'U') then write(*, 2015) m, xcr(m) else if (xcrdif(m) .le. epsilon) then write (*,2011) m,xcr(m),xcrref(m),xcrdif(m) else verified = .false. write (*,2010) m,xcr(m),xcrref(m),xcrdif(m) end if end do if (class .ne. 'U') then write (*,2002) else write (*,2006) end if 2002 format(' Comparison of RMS-norms of solution error') 2006 format(' RMS-norms of solution error') do m = 1, 5 if (class == 'U') then write(*, 2015) m, xce(m) else if (xcedif(m) .le. epsilon) then write (*,2011) m,xce(m),xceref(m),xcedif(m) else verified = .false. write (*,2010) m,xce(m),xceref(m),xcedif(m) end if end do 2010 format(' FAILURE: ', i2, E20.13, E20.13, E20.13) 2011 format(' ', i2, E20.13, E20.13, E20.13) 2015 format(' ', i2, E20.13) if (class == 'U') then write(*, 2022) write(*, 2023) 2022 format(' No reference values provided') 2023 format(' No verification performed') else if (verified) then write(*, 2020) 2020 format(' Verification Successful') else write(*, 2021) 2021 format(' Verification failed') end if return end subroutine x_solve ( ) !*****************************************************************************80 ! !! XSOLVE performs line solves in the X direction. ! ! Discussion: ! ! The routine first factors ! the block-tridiagonal matrix into an upper triangular matrix, ! and then performs back substitution to solve for the unknown ! vectors of each line. ! ! Make sure we treat elements zero to cell_size in the direction ! of the sweep. ! include 'header.h' include 'work_lhs.h' integer i,j,k,m,n,isize if (timeron) call timer_start(t_xsolve) ! ! This function computes the left hand side in the xi-direction ! isize = grid_points(1)-1 ! ! determine a (labeled f) and n jacobians ! do k = 1, grid_points(3)-2 do j = 1, grid_points(2)-2 do i = 0, isize tmp1 = rho_i(i,j,k) tmp2 = tmp1 * tmp1 tmp3 = tmp1 * tmp2 fjac(1,1,i) = 0.0d+00 fjac(1,2,i) = 1.0d+00 fjac(1,3,i) = 0.0d+00 fjac(1,4,i) = 0.0d+00 fjac(1,5,i) = 0.0d+00 fjac(2,1,i) = -(u(2,i,j,k) * tmp2 * & u(2,i,j,k)) & + c2 * qs(i,j,k) fjac(2,2,i) = ( 2.0d+00 - c2 ) & * ( u(2,i,j,k) / u(1,i,j,k) ) fjac(2,3,i) = - c2 * ( u(3,i,j,k) * tmp1 ) fjac(2,4,i) = - c2 * ( u(4,i,j,k) * tmp1 ) fjac(2,5,i) = c2 fjac(3,1,i) = - ( u(2,i,j,k)*u(3,i,j,k) ) * tmp2 fjac(3,2,i) = u(3,i,j,k) * tmp1 fjac(3,3,i) = u(2,i,j,k) * tmp1 fjac(3,4,i) = 0.0d+00 fjac(3,5,i) = 0.0d+00 fjac(4,1,i) = - ( u(2,i,j,k)*u(4,i,j,k) ) * tmp2 fjac(4,2,i) = u(4,i,j,k) * tmp1 fjac(4,3,i) = 0.0d+00 fjac(4,4,i) = u(2,i,j,k) * tmp1 fjac(4,5,i) = 0.0d+00 fjac(5,1,i) = ( c2 * 2.0d0 * square(i,j,k) & - c1 * u(5,i,j,k) ) & * ( u(2,i,j,k) * tmp2 ) fjac(5,2,i) = c1 * u(5,i,j,k) * tmp1 & - c2 & * ( u(2,i,j,k)*u(2,i,j,k) * tmp2 & + qs(i,j,k) ) fjac(5,3,i) = - c2 * ( u(3,i,j,k)*u(2,i,j,k) ) & * tmp2 fjac(5,4,i) = - c2 * ( u(4,i,j,k)*u(2,i,j,k) ) & * tmp2 fjac(5,5,i) = c1 * ( u(2,i,j,k) * tmp1 ) njac(1,1,i) = 0.0d+00 njac(1,2,i) = 0.0d+00 njac(1,3,i) = 0.0d+00 njac(1,4,i) = 0.0d+00 njac(1,5,i) = 0.0d+00 njac(2,1,i) = - con43 * c3c4 * tmp2 * u(2,i,j,k) njac(2,2,i) = con43 * c3c4 * tmp1 njac(2,3,i) = 0.0d+00 njac(2,4,i) = 0.0d+00 njac(2,5,i) = 0.0d+00 njac(3,1,i) = - c3c4 * tmp2 * u(3,i,j,k) njac(3,2,i) = 0.0d+00 njac(3,3,i) = c3c4 * tmp1 njac(3,4,i) = 0.0d+00 njac(3,5,i) = 0.0d+00 njac(4,1,i) = - c3c4 * tmp2 * u(4,i,j,k) njac(4,2,i) = 0.0d+00 njac(4,3,i) = 0.0d+00 njac(4,4,i) = c3c4 * tmp1 njac(4,5,i) = 0.0d+00 njac(5,1,i) = - ( con43 * c3c4 & - c1345 ) * tmp3 * (u(2,i,j,k)**2) & - ( c3c4 - c1345 ) * tmp3 * (u(3,i,j,k)**2) & - ( c3c4 - c1345 ) * tmp3 * (u(4,i,j,k)**2) & - c1345 * tmp2 * u(5,i,j,k) njac(5,2,i) = ( con43 * c3c4 & - c1345 ) * tmp2 * u(2,i,j,k) njac(5,3,i) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k) njac(5,4,i) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k) njac(5,5,i) = ( c1345 ) * tmp1 end do ! ! now jacobians set, so form left hand side in x direction ! call lhsinit(lhs, isize) do i = 1, isize-1 tmp1 = dt * tx1 tmp2 = dt * tx2 lhs(1,1,aa,i) = - tmp2 * fjac(1,1,i-1) & - tmp1 * njac(1,1,i-1) & - tmp1 * dx1 lhs(1,2,aa,i) = - tmp2 * fjac(1,2,i-1) & - tmp1 * njac(1,2,i-1) lhs(1,3,aa,i) = - tmp2 * fjac(1,3,i-1) & - tmp1 * njac(1,3,i-1) lhs(1,4,aa,i) = - tmp2 * fjac(1,4,i-1) & - tmp1 * njac(1,4,i-1) lhs(1,5,aa,i) = - tmp2 * fjac(1,5,i-1) & - tmp1 * njac(1,5,i-1) lhs(2,1,aa,i) = - tmp2 * fjac(2,1,i-1) & - tmp1 * njac(2,1,i-1) lhs(2,2,aa,i) = - tmp2 * fjac(2,2,i-1) & - tmp1 * njac(2,2,i-1) & - tmp1 * dx2 lhs(2,3,aa,i) = - tmp2 * fjac(2,3,i-1) & - tmp1 * njac(2,3,i-1) lhs(2,4,aa,i) = - tmp2 * fjac(2,4,i-1) & - tmp1 * njac(2,4,i-1) lhs(2,5,aa,i) = - tmp2 * fjac(2,5,i-1) & - tmp1 * njac(2,5,i-1) lhs(3,1,aa,i) = - tmp2 * fjac(3,1,i-1) & - tmp1 * njac(3,1,i-1) lhs(3,2,aa,i) = - tmp2 * fjac(3,2,i-1) & - tmp1 * njac(3,2,i-1) lhs(3,3,aa,i) = - tmp2 * fjac(3,3,i-1) & - tmp1 * njac(3,3,i-1) & - tmp1 * dx3 lhs(3,4,aa,i) = - tmp2 * fjac(3,4,i-1) & - tmp1 * njac(3,4,i-1) lhs(3,5,aa,i) = - tmp2 * fjac(3,5,i-1) & - tmp1 * njac(3,5,i-1) lhs(4,1,aa,i) = - tmp2 * fjac(4,1,i-1) & - tmp1 * njac(4,1,i-1) lhs(4,2,aa,i) = - tmp2 * fjac(4,2,i-1) & - tmp1 * njac(4,2,i-1) lhs(4,3,aa,i) = - tmp2 * fjac(4,3,i-1) & - tmp1 * njac(4,3,i-1) lhs(4,4,aa,i) = - tmp2 * fjac(4,4,i-1) & - tmp1 * njac(4,4,i-1) & - tmp1 * dx4 lhs(4,5,aa,i) = - tmp2 * fjac(4,5,i-1) & - tmp1 * njac(4,5,i-1) lhs(5,1,aa,i) = - tmp2 * fjac(5,1,i-1) & - tmp1 * njac(5,1,i-1) lhs(5,2,aa,i) = - tmp2 * fjac(5,2,i-1) & - tmp1 * njac(5,2,i-1) lhs(5,3,aa,i) = - tmp2 * fjac(5,3,i-1) & - tmp1 * njac(5,3,i-1) lhs(5,4,aa,i) = - tmp2 * fjac(5,4,i-1) & - tmp1 * njac(5,4,i-1) lhs(5,5,aa,i) = - tmp2 * fjac(5,5,i-1) & - tmp1 * njac(5,5,i-1) & - tmp1 * dx5 lhs(1,1,bb,i) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(1,1,i) & + tmp1 * 2.0d+00 * dx1 lhs(1,2,bb,i) = tmp1 * 2.0d+00 * njac(1,2,i) lhs(1,3,bb,i) = tmp1 * 2.0d+00 * njac(1,3,i) lhs(1,4,bb,i) = tmp1 * 2.0d+00 * njac(1,4,i) lhs(1,5,bb,i) = tmp1 * 2.0d+00 * njac(1,5,i) lhs(2,1,bb,i) = tmp1 * 2.0d+00 * njac(2,1,i) lhs(2,2,bb,i) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(2,2,i) & + tmp1 * 2.0d+00 * dx2 lhs(2,3,bb,i) = tmp1 * 2.0d+00 * njac(2,3,i) lhs(2,4,bb,i) = tmp1 * 2.0d+00 * njac(2,4,i) lhs(2,5,bb,i) = tmp1 * 2.0d+00 * njac(2,5,i) lhs(3,1,bb,i) = tmp1 * 2.0d+00 * njac(3,1,i) lhs(3,2,bb,i) = tmp1 * 2.0d+00 * njac(3,2,i) lhs(3,3,bb,i) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(3,3,i) & + tmp1 * 2.0d+00 * dx3 lhs(3,4,bb,i) = tmp1 * 2.0d+00 * njac(3,4,i) lhs(3,5,bb,i) = tmp1 * 2.0d+00 * njac(3,5,i) lhs(4,1,bb,i) = tmp1 * 2.0d+00 * njac(4,1,i) lhs(4,2,bb,i) = tmp1 * 2.0d+00 * njac(4,2,i) lhs(4,3,bb,i) = tmp1 * 2.0d+00 * njac(4,3,i) lhs(4,4,bb,i) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(4,4,i) & + tmp1 * 2.0d+00 * dx4 lhs(4,5,bb,i) = tmp1 * 2.0d+00 * njac(4,5,i) lhs(5,1,bb,i) = tmp1 * 2.0d+00 * njac(5,1,i) lhs(5,2,bb,i) = tmp1 * 2.0d+00 * njac(5,2,i) lhs(5,3,bb,i) = tmp1 * 2.0d+00 * njac(5,3,i) lhs(5,4,bb,i) = tmp1 * 2.0d+00 * njac(5,4,i) lhs(5,5,bb,i) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(5,5,i) & + tmp1 * 2.0d+00 * dx5 lhs(1,1,cc,i) = tmp2 * fjac(1,1,i+1) & - tmp1 * njac(1,1,i+1) & - tmp1 * dx1 lhs(1,2,cc,i) = tmp2 * fjac(1,2,i+1) & - tmp1 * njac(1,2,i+1) lhs(1,3,cc,i) = tmp2 * fjac(1,3,i+1) & - tmp1 * njac(1,3,i+1) lhs(1,4,cc,i) = tmp2 * fjac(1,4,i+1) & - tmp1 * njac(1,4,i+1) lhs(1,5,cc,i) = tmp2 * fjac(1,5,i+1) & - tmp1 * njac(1,5,i+1) lhs(2,1,cc,i) = tmp2 * fjac(2,1,i+1) & - tmp1 * njac(2,1,i+1) lhs(2,2,cc,i) = tmp2 * fjac(2,2,i+1) & - tmp1 * njac(2,2,i+1) & - tmp1 * dx2 lhs(2,3,cc,i) = tmp2 * fjac(2,3,i+1) & - tmp1 * njac(2,3,i+1) lhs(2,4,cc,i) = tmp2 * fjac(2,4,i+1) & - tmp1 * njac(2,4,i+1) lhs(2,5,cc,i) = tmp2 * fjac(2,5,i+1) & - tmp1 * njac(2,5,i+1) lhs(3,1,cc,i) = tmp2 * fjac(3,1,i+1) & - tmp1 * njac(3,1,i+1) lhs(3,2,cc,i) = tmp2 * fjac(3,2,i+1) & - tmp1 * njac(3,2,i+1) lhs(3,3,cc,i) = tmp2 * fjac(3,3,i+1) & - tmp1 * njac(3,3,i+1) & - tmp1 * dx3 lhs(3,4,cc,i) = tmp2 * fjac(3,4,i+1) & - tmp1 * njac(3,4,i+1) lhs(3,5,cc,i) = tmp2 * fjac(3,5,i+1) & - tmp1 * njac(3,5,i+1) lhs(4,1,cc,i) = tmp2 * fjac(4,1,i+1) & - tmp1 * njac(4,1,i+1) lhs(4,2,cc,i) = tmp2 * fjac(4,2,i+1) & - tmp1 * njac(4,2,i+1) lhs(4,3,cc,i) = tmp2 * fjac(4,3,i+1) & - tmp1 * njac(4,3,i+1) lhs(4,4,cc,i) = tmp2 * fjac(4,4,i+1) & - tmp1 * njac(4,4,i+1) & - tmp1 * dx4 lhs(4,5,cc,i) = tmp2 * fjac(4,5,i+1) & - tmp1 * njac(4,5,i+1) lhs(5,1,cc,i) = tmp2 * fjac(5,1,i+1) & - tmp1 * njac(5,1,i+1) lhs(5,2,cc,i) = tmp2 * fjac(5,2,i+1) & - tmp1 * njac(5,2,i+1) lhs(5,3,cc,i) = tmp2 * fjac(5,3,i+1) & - tmp1 * njac(5,3,i+1) lhs(5,4,cc,i) = tmp2 * fjac(5,4,i+1) & - tmp1 * njac(5,4,i+1) lhs(5,5,cc,i) = tmp2 * fjac(5,5,i+1) & - tmp1 * njac(5,5,i+1) & - tmp1 * dx5 end do ! ! performs guaussian elimination on this cell. ! ! assumes that unpacking routines for non-first cells ! preload C' and rhs' from previous cell. ! ! assumed send happens outside this routine, but that ! c'(IMAX) and rhs'(IMAX) will be sent to next cell ! ! ! outer most do loops - sweeping in i direction ! ! ! multiply c(0,j,k) by b_inverse and copy back to c ! multiply rhs(0) by b_inverse(0) and copy to rhs ! call binvcrhs( lhs(1,1,bb,0), & lhs(1,1,cc,0), & rhs(1,0,j,k) ) ! ! begin inner most do loop ! do all the elements of the cell unless last ! do i=1,isize-1 ! ! rhs(i) = rhs(i) - A*rhs(i-1) ! call matvec_sub(lhs(1,1,aa,i), & rhs(1,i-1,j,k),rhs(1,i,j,k)) ! ! B(i) = B(i) - C(i-1)*A(i) ! call matmul_sub(lhs(1,1,aa,i), & lhs(1,1,cc,i-1), & lhs(1,1,bb,i)) ! ! multiply c(i,j,k) by b_inverse and copy back to c ! multiply rhs(1,j,k) by b_inverse(1,j,k) and copy to rhs ! call binvcrhs( lhs(1,1,bb,i), & lhs(1,1,cc,i), & rhs(1,i,j,k) ) end do ! ! rhs(isize) = rhs(isize) - A*rhs(isize-1) ! call matvec_sub(lhs(1,1,aa,isize), & rhs(1,isize-1,j,k),rhs(1,isize,j,k)) ! ! B(isize) = B(isize) - C(isize-1)*A(isize) ! call matmul_sub(lhs(1,1,aa,isize), & lhs(1,1,cc,isize-1), & lhs(1,1,bb,isize)) ! ! multiply rhs() by b_inverse() and copy to rhs ! call binvrhs( lhs(1,1,bb,isize), & rhs(1,isize,j,k) ) ! ! back solve: if last cell, then generate U(isize)=rhs(isize) ! else assume U(isize) is loaded in un pack backsub_info ! so just use it ! after call u(istart) will be sent to next cell ! do i=isize-1,0,-1 do m=1,BLOCK_SIZE do n=1,BLOCK_SIZE rhs(m,i,j,k) = rhs(m,i,j,k) & - lhs(m,n,cc,i)*rhs(n,i+1,j,k) end do end do end do end do end do if (timeron) call timer_stop(t_xsolve) return end subroutine y_solve ( ) !*****************************************************************************80 ! !! Y_SOLVE performs line solves in the Y direction. ! ! Discussion: ! ! The routine first factors ! the block-tridiagonal matrix into an upper triangular matrix, ! and then performs back substitution to solve for the unknown ! vectors of each line. ! ! Make sure we treat elements zero to cell_size in the direction ! of the sweep. ! include 'header.h' include 'work_lhs.h' integer i, j, k, m, n, jsize if (timeron) call timer_start(t_ysolve) ! ! ! ! This function computes the left hand side for the three y-factors ! jsize = grid_points(2)-1 ! ! Compute the indices for storing the tri-diagonal matrix; ! determine a (labeled f) and n jacobians for cell c ! do k = 1, grid_points(3)-2 do i = 1, grid_points(1)-2 do j = 0, jsize tmp1 = rho_i(i,j,k) tmp2 = tmp1 * tmp1 tmp3 = tmp1 * tmp2 fjac(1,1,j) = 0.0d+00 fjac(1,2,j) = 0.0d+00 fjac(1,3,j) = 1.0d+00 fjac(1,4,j) = 0.0d+00 fjac(1,5,j) = 0.0d+00 fjac(2,1,j) = - ( u(2,i,j,k)*u(3,i,j,k) ) & * tmp2 fjac(2,2,j) = u(3,i,j,k) * tmp1 fjac(2,3,j) = u(2,i,j,k) * tmp1 fjac(2,4,j) = 0.0d+00 fjac(2,5,j) = 0.0d+00 fjac(3,1,j) = - ( u(3,i,j,k)*u(3,i,j,k)*tmp2) & + c2 * qs(i,j,k) fjac(3,2,j) = - c2 * u(2,i,j,k) * tmp1 fjac(3,3,j) = ( 2.0d+00 - c2 ) & * u(3,i,j,k) * tmp1 fjac(3,4,j) = - c2 * u(4,i,j,k) * tmp1 fjac(3,5,j) = c2 fjac(4,1,j) = - ( u(3,i,j,k)*u(4,i,j,k) ) & * tmp2 fjac(4,2,j) = 0.0d+00 fjac(4,3,j) = u(4,i,j,k) * tmp1 fjac(4,4,j) = u(3,i,j,k) * tmp1 fjac(4,5,j) = 0.0d+00 fjac(5,1,j) = ( c2 * 2.0d0 * square(i,j,k) & - c1 * u(5,i,j,k) ) & * u(3,i,j,k) * tmp2 fjac(5,2,j) = - c2 * u(2,i,j,k)*u(3,i,j,k) & * tmp2 fjac(5,3,j) = c1 * u(5,i,j,k) * tmp1 & - c2 & * ( qs(i,j,k) & + u(3,i,j,k)*u(3,i,j,k) * tmp2 ) fjac(5,4,j) = - c2 * ( u(3,i,j,k)*u(4,i,j,k) ) & * tmp2 fjac(5,5,j) = c1 * u(3,i,j,k) * tmp1 njac(1,1,j) = 0.0d+00 njac(1,2,j) = 0.0d+00 njac(1,3,j) = 0.0d+00 njac(1,4,j) = 0.0d+00 njac(1,5,j) = 0.0d+00 njac(2,1,j) = - c3c4 * tmp2 * u(2,i,j,k) njac(2,2,j) = c3c4 * tmp1 njac(2,3,j) = 0.0d+00 njac(2,4,j) = 0.0d+00 njac(2,5,j) = 0.0d+00 njac(3,1,j) = - con43 * c3c4 * tmp2 * u(3,i,j,k) njac(3,2,j) = 0.0d+00 njac(3,3,j) = con43 * c3c4 * tmp1 njac(3,4,j) = 0.0d+00 njac(3,5,j) = 0.0d+00 njac(4,1,j) = - c3c4 * tmp2 * u(4,i,j,k) njac(4,2,j) = 0.0d+00 njac(4,3,j) = 0.0d+00 njac(4,4,j) = c3c4 * tmp1 njac(4,5,j) = 0.0d+00 njac(5,1,j) = - ( c3c4 & - c1345 ) * tmp3 * (u(2,i,j,k)**2) & - ( con43 * c3c4 & - c1345 ) * tmp3 * (u(3,i,j,k)**2) & - ( c3c4 - c1345 ) * tmp3 * (u(4,i,j,k)**2) & - c1345 * tmp2 * u(5,i,j,k) njac(5,2,j) = ( c3c4 - c1345 ) * tmp2 * u(2,i,j,k) njac(5,3,j) = ( con43 * c3c4 & - c1345 ) * tmp2 * u(3,i,j,k) njac(5,4,j) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k) njac(5,5,j) = ( c1345 ) * tmp1 end do ! ! now joacobians set, so form left hand side in y direction ! call lhsinit(lhs, jsize) do j = 1, jsize-1 tmp1 = dt * ty1 tmp2 = dt * ty2 lhs(1,1,aa,j) = - tmp2 * fjac(1,1,j-1) & - tmp1 * njac(1,1,j-1) & - tmp1 * dy1 lhs(1,2,aa,j) = - tmp2 * fjac(1,2,j-1) & - tmp1 * njac(1,2,j-1) lhs(1,3,aa,j) = - tmp2 * fjac(1,3,j-1) & - tmp1 * njac(1,3,j-1) lhs(1,4,aa,j) = - tmp2 * fjac(1,4,j-1) & - tmp1 * njac(1,4,j-1) lhs(1,5,aa,j) = - tmp2 * fjac(1,5,j-1) & - tmp1 * njac(1,5,j-1) lhs(2,1,aa,j) = - tmp2 * fjac(2,1,j-1) & - tmp1 * njac(2,1,j-1) lhs(2,2,aa,j) = - tmp2 * fjac(2,2,j-1) & - tmp1 * njac(2,2,j-1) & - tmp1 * dy2 lhs(2,3,aa,j) = - tmp2 * fjac(2,3,j-1) & - tmp1 * njac(2,3,j-1) lhs(2,4,aa,j) = - tmp2 * fjac(2,4,j-1) & - tmp1 * njac(2,4,j-1) lhs(2,5,aa,j) = - tmp2 * fjac(2,5,j-1) & - tmp1 * njac(2,5,j-1) lhs(3,1,aa,j) = - tmp2 * fjac(3,1,j-1) & - tmp1 * njac(3,1,j-1) lhs(3,2,aa,j) = - tmp2 * fjac(3,2,j-1) & - tmp1 * njac(3,2,j-1) lhs(3,3,aa,j) = - tmp2 * fjac(3,3,j-1) & - tmp1 * njac(3,3,j-1) & - tmp1 * dy3 lhs(3,4,aa,j) = - tmp2 * fjac(3,4,j-1) & - tmp1 * njac(3,4,j-1) lhs(3,5,aa,j) = - tmp2 * fjac(3,5,j-1) & - tmp1 * njac(3,5,j-1) lhs(4,1,aa,j) = - tmp2 * fjac(4,1,j-1) & - tmp1 * njac(4,1,j-1) lhs(4,2,aa,j) = - tmp2 * fjac(4,2,j-1) & - tmp1 * njac(4,2,j-1) lhs(4,3,aa,j) = - tmp2 * fjac(4,3,j-1) & - tmp1 * njac(4,3,j-1) lhs(4,4,aa,j) = - tmp2 * fjac(4,4,j-1) & - tmp1 * njac(4,4,j-1) & - tmp1 * dy4 lhs(4,5,aa,j) = - tmp2 * fjac(4,5,j-1) & - tmp1 * njac(4,5,j-1) lhs(5,1,aa,j) = - tmp2 * fjac(5,1,j-1) & - tmp1 * njac(5,1,j-1) lhs(5,2,aa,j) = - tmp2 * fjac(5,2,j-1) & - tmp1 * njac(5,2,j-1) lhs(5,3,aa,j) = - tmp2 * fjac(5,3,j-1) & - tmp1 * njac(5,3,j-1) lhs(5,4,aa,j) = - tmp2 * fjac(5,4,j-1) & - tmp1 * njac(5,4,j-1) lhs(5,5,aa,j) = - tmp2 * fjac(5,5,j-1) & - tmp1 * njac(5,5,j-1) & - tmp1 * dy5 lhs(1,1,bb,j) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(1,1,j) & + tmp1 * 2.0d+00 * dy1 lhs(1,2,bb,j) = tmp1 * 2.0d+00 * njac(1,2,j) lhs(1,3,bb,j) = tmp1 * 2.0d+00 * njac(1,3,j) lhs(1,4,bb,j) = tmp1 * 2.0d+00 * njac(1,4,j) lhs(1,5,bb,j) = tmp1 * 2.0d+00 * njac(1,5,j) lhs(2,1,bb,j) = tmp1 * 2.0d+00 * njac(2,1,j) lhs(2,2,bb,j) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(2,2,j) & + tmp1 * 2.0d+00 * dy2 lhs(2,3,bb,j) = tmp1 * 2.0d+00 * njac(2,3,j) lhs(2,4,bb,j) = tmp1 * 2.0d+00 * njac(2,4,j) lhs(2,5,bb,j) = tmp1 * 2.0d+00 * njac(2,5,j) lhs(3,1,bb,j) = tmp1 * 2.0d+00 * njac(3,1,j) lhs(3,2,bb,j) = tmp1 * 2.0d+00 * njac(3,2,j) lhs(3,3,bb,j) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(3,3,j) & + tmp1 * 2.0d+00 * dy3 lhs(3,4,bb,j) = tmp1 * 2.0d+00 * njac(3,4,j) lhs(3,5,bb,j) = tmp1 * 2.0d+00 * njac(3,5,j) lhs(4,1,bb,j) = tmp1 * 2.0d+00 * njac(4,1,j) lhs(4,2,bb,j) = tmp1 * 2.0d+00 * njac(4,2,j) lhs(4,3,bb,j) = tmp1 * 2.0d+00 * njac(4,3,j) lhs(4,4,bb,j) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(4,4,j) & + tmp1 * 2.0d+00 * dy4 lhs(4,5,bb,j) = tmp1 * 2.0d+00 * njac(4,5,j) lhs(5,1,bb,j) = tmp1 * 2.0d+00 * njac(5,1,j) lhs(5,2,bb,j) = tmp1 * 2.0d+00 * njac(5,2,j) lhs(5,3,bb,j) = tmp1 * 2.0d+00 * njac(5,3,j) lhs(5,4,bb,j) = tmp1 * 2.0d+00 * njac(5,4,j) lhs(5,5,bb,j) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(5,5,j) & + tmp1 * 2.0d+00 * dy5 lhs(1,1,cc,j) = tmp2 * fjac(1,1,j+1) & - tmp1 * njac(1,1,j+1) & - tmp1 * dy1 lhs(1,2,cc,j) = tmp2 * fjac(1,2,j+1) & - tmp1 * njac(1,2,j+1) lhs(1,3,cc,j) = tmp2 * fjac(1,3,j+1) & - tmp1 * njac(1,3,j+1) lhs(1,4,cc,j) = tmp2 * fjac(1,4,j+1) & - tmp1 * njac(1,4,j+1) lhs(1,5,cc,j) = tmp2 * fjac(1,5,j+1) & - tmp1 * njac(1,5,j+1) lhs(2,1,cc,j) = tmp2 * fjac(2,1,j+1) & - tmp1 * njac(2,1,j+1) lhs(2,2,cc,j) = tmp2 * fjac(2,2,j+1) & - tmp1 * njac(2,2,j+1) & - tmp1 * dy2 lhs(2,3,cc,j) = tmp2 * fjac(2,3,j+1) & - tmp1 * njac(2,3,j+1) lhs(2,4,cc,j) = tmp2 * fjac(2,4,j+1) & - tmp1 * njac(2,4,j+1) lhs(2,5,cc,j) = tmp2 * fjac(2,5,j+1) & - tmp1 * njac(2,5,j+1) lhs(3,1,cc,j) = tmp2 * fjac(3,1,j+1) & - tmp1 * njac(3,1,j+1) lhs(3,2,cc,j) = tmp2 * fjac(3,2,j+1) & - tmp1 * njac(3,2,j+1) lhs(3,3,cc,j) = tmp2 * fjac(3,3,j+1) & - tmp1 * njac(3,3,j+1) & - tmp1 * dy3 lhs(3,4,cc,j) = tmp2 * fjac(3,4,j+1) & - tmp1 * njac(3,4,j+1) lhs(3,5,cc,j) = tmp2 * fjac(3,5,j+1) & - tmp1 * njac(3,5,j+1) lhs(4,1,cc,j) = tmp2 * fjac(4,1,j+1) & - tmp1 * njac(4,1,j+1) lhs(4,2,cc,j) = tmp2 * fjac(4,2,j+1) & - tmp1 * njac(4,2,j+1) lhs(4,3,cc,j) = tmp2 * fjac(4,3,j+1) & - tmp1 * njac(4,3,j+1) lhs(4,4,cc,j) = tmp2 * fjac(4,4,j+1) & - tmp1 * njac(4,4,j+1) & - tmp1 * dy4 lhs(4,5,cc,j) = tmp2 * fjac(4,5,j+1) & - tmp1 * njac(4,5,j+1) lhs(5,1,cc,j) = tmp2 * fjac(5,1,j+1) & - tmp1 * njac(5,1,j+1) lhs(5,2,cc,j) = tmp2 * fjac(5,2,j+1) & - tmp1 * njac(5,2,j+1) lhs(5,3,cc,j) = tmp2 * fjac(5,3,j+1) & - tmp1 * njac(5,3,j+1) lhs(5,4,cc,j) = tmp2 * fjac(5,4,j+1) & - tmp1 * njac(5,4,j+1) lhs(5,5,cc,j) = tmp2 * fjac(5,5,j+1) & - tmp1 * njac(5,5,j+1) & - tmp1 * dy5 end do ! ! ! ! performs guaussian elimination on this cell. ! ! assumes that unpacking routines for non-first cells ! preload C' and rhs' from previous cell. ! ! assumed send happens outside this routine, but that ! c'(JMAX) and rhs'(JMAX) will be sent to next cell ! ! ! multiply c(i,0,k) by b_inverse and copy back to c ! multiply rhs(0) by b_inverse(0) and copy to rhs ! call binvcrhs( lhs(1,1,bb,0), & lhs(1,1,cc,0), & rhs(1,i,0,k) ) ! ! begin inner most do loop ! do all the elements of the cell unless last ! do j=1,jsize-1 ! ! subtract A*lhs_vector(j-1) from lhs_vector(j) ! ! rhs(j) = rhs(j) - A*rhs(j-1) ! call matvec_sub(lhs(1,1,aa,j), & rhs(1,i,j-1,k),rhs(1,i,j,k)) ! ! B(j) = B(j) - C(j-1)*A(j) ! call matmul_sub(lhs(1,1,aa,j), & lhs(1,1,cc,j-1), & lhs(1,1,bb,j)) ! ! multiply c(i,j,k) by b_inverse and copy back to c ! multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs ! call binvcrhs( lhs(1,1,bb,j), & lhs(1,1,cc,j), & rhs(1,i,j,k) ) end do ! ! rhs(jsize) = rhs(jsize) - A*rhs(jsize-1) ! call matvec_sub(lhs(1,1,aa,jsize), & rhs(1,i,jsize-1,k),rhs(1,i,jsize,k)) ! ! B(jsize) = B(jsize) - C(jsize-1)*A(jsize) ! call matmul_sub(aa,i,jsize,k,c, ! $ cc,i,jsize-1,k,c,bb,i,jsize,k) ! call matmul_sub(lhs(1,1,aa,jsize), & lhs(1,1,cc,jsize-1), & lhs(1,1,bb,jsize)) ! ! multiply rhs(jsize) by b_inverse(jsize) and copy to rhs ! call binvrhs( lhs(1,1,bb,jsize), & rhs(1,i,jsize,k) ) ! ! back solve: if last cell, then generate U(jsize)=rhs(jsize) ! else assume U(jsize) is loaded in un pack backsub_info ! so just use it ! after call u(jstart) will be sent to next cell ! do j=jsize-1,0,-1 do m=1,BLOCK_SIZE do n=1,BLOCK_SIZE rhs(m,i,j,k) = rhs(m,i,j,k) & - lhs(m,n,cc,j)*rhs(n,i,j+1,k) end do end do end do end do end do if (timeron) call timer_stop(t_ysolve) return end subroutine z_solve ( ) !*****************************************************************************80 ! !! ZSOLVE performs line solves in the Z direction. ! ! Discussion: ! ! The routine first factors ! the block-tridiagonal matrix into an upper triangular matrix, ! and then performs back substitution to solve for the unknown ! vectors of each line. ! ! Make sure we treat elements zero to cell_size in the direction ! of the sweep. ! include 'header.h' include 'work_lhs.h' integer i, j, k, m, n, ksize if (timeron) call timer_start(t_zsolve) ! ! ! ! This function computes the left hand side for the three z-factors ! ksize = grid_points(3)-1 ! ! Compute the indices for storing the block-diagonal matrix; ! determine c (labeled f) and s jacobians ! do j = 1, grid_points(2)-2 do i = 1, grid_points(1)-2 do k = 0, ksize tmp1 = 1.0d+00 / u(1,i,j,k) tmp2 = tmp1 * tmp1 tmp3 = tmp1 * tmp2 fjac(1,1,k) = 0.0d+00 fjac(1,2,k) = 0.0d+00 fjac(1,3,k) = 0.0d+00 fjac(1,4,k) = 1.0d+00 fjac(1,5,k) = 0.0d+00 fjac(2,1,k) = - ( u(2,i,j,k)*u(4,i,j,k) ) & * tmp2 fjac(2,2,k) = u(4,i,j,k) * tmp1 fjac(2,3,k) = 0.0d+00 fjac(2,4,k) = u(2,i,j,k) * tmp1 fjac(2,5,k) = 0.0d+00 fjac(3,1,k) = - ( u(3,i,j,k)*u(4,i,j,k) ) & * tmp2 fjac(3,2,k) = 0.0d+00 fjac(3,3,k) = u(4,i,j,k) * tmp1 fjac(3,4,k) = u(3,i,j,k) * tmp1 fjac(3,5,k) = 0.0d+00 fjac(4,1,k) = - (u(4,i,j,k)*u(4,i,j,k) * tmp2 ) & + c2 * qs(i,j,k) fjac(4,2,k) = - c2 * u(2,i,j,k) * tmp1 fjac(4,3,k) = - c2 * u(3,i,j,k) * tmp1 fjac(4,4,k) = ( 2.0d+00 - c2 ) & * u(4,i,j,k) * tmp1 fjac(4,5,k) = c2 fjac(5,1,k) = ( c2 * 2.0d0 * square(i,j,k) & - c1 * u(5,i,j,k) ) & * u(4,i,j,k) * tmp2 fjac(5,2,k) = - c2 * ( u(2,i,j,k)*u(4,i,j,k) ) & * tmp2 fjac(5,3,k) = - c2 * ( u(3,i,j,k)*u(4,i,j,k) ) & * tmp2 fjac(5,4,k) = c1 * ( u(5,i,j,k) * tmp1 ) & - c2 & * ( qs(i,j,k) & + u(4,i,j,k)*u(4,i,j,k) * tmp2 ) fjac(5,5,k) = c1 * u(4,i,j,k) * tmp1 njac(1,1,k) = 0.0d+00 njac(1,2,k) = 0.0d+00 njac(1,3,k) = 0.0d+00 njac(1,4,k) = 0.0d+00 njac(1,5,k) = 0.0d+00 njac(2,1,k) = - c3c4 * tmp2 * u(2,i,j,k) njac(2,2,k) = c3c4 * tmp1 njac(2,3,k) = 0.0d+00 njac(2,4,k) = 0.0d+00 njac(2,5,k) = 0.0d+00 njac(3,1,k) = - c3c4 * tmp2 * u(3,i,j,k) njac(3,2,k) = 0.0d+00 njac(3,3,k) = c3c4 * tmp1 njac(3,4,k) = 0.0d+00 njac(3,5,k) = 0.0d+00 njac(4,1,k) = - con43 * c3c4 * tmp2 * u(4,i,j,k) njac(4,2,k) = 0.0d+00 njac(4,3,k) = 0.0d+00 njac(4,4,k) = con43 * c3 * c4 * tmp1 njac(4,5,k) = 0.0d+00 njac(5,1,k) = - ( c3c4 & - c1345 ) * tmp3 * (u(2,i,j,k)**2) & - ( c3c4 - c1345 ) * tmp3 * (u(3,i,j,k)**2) & - ( con43 * c3c4 & - c1345 ) * tmp3 * (u(4,i,j,k)**2) & - c1345 * tmp2 * u(5,i,j,k) njac(5,2,k) = ( c3c4 - c1345 ) * tmp2 * u(2,i,j,k) njac(5,3,k) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k) njac(5,4,k) = ( con43 * c3c4 & - c1345 ) * tmp2 * u(4,i,j,k) njac(5,5,k) = ( c1345 )* tmp1 end do ! ! now jacobians set, so form left hand side in z direction ! call lhsinit(lhs, ksize) do k = 1, ksize-1 tmp1 = dt * tz1 tmp2 = dt * tz2 lhs(1,1,aa,k) = - tmp2 * fjac(1,1,k-1) & - tmp1 * njac(1,1,k-1) & - tmp1 * dz1 lhs(1,2,aa,k) = - tmp2 * fjac(1,2,k-1) & - tmp1 * njac(1,2,k-1) lhs(1,3,aa,k) = - tmp2 * fjac(1,3,k-1) & - tmp1 * njac(1,3,k-1) lhs(1,4,aa,k) = - tmp2 * fjac(1,4,k-1) & - tmp1 * njac(1,4,k-1) lhs(1,5,aa,k) = - tmp2 * fjac(1,5,k-1) & - tmp1 * njac(1,5,k-1) lhs(2,1,aa,k) = - tmp2 * fjac(2,1,k-1) & - tmp1 * njac(2,1,k-1) lhs(2,2,aa,k) = - tmp2 * fjac(2,2,k-1) & - tmp1 * njac(2,2,k-1) & - tmp1 * dz2 lhs(2,3,aa,k) = - tmp2 * fjac(2,3,k-1) & - tmp1 * njac(2,3,k-1) lhs(2,4,aa,k) = - tmp2 * fjac(2,4,k-1) & - tmp1 * njac(2,4,k-1) lhs(2,5,aa,k) = - tmp2 * fjac(2,5,k-1) & - tmp1 * njac(2,5,k-1) lhs(3,1,aa,k) = - tmp2 * fjac(3,1,k-1) & - tmp1 * njac(3,1,k-1) lhs(3,2,aa,k) = - tmp2 * fjac(3,2,k-1) & - tmp1 * njac(3,2,k-1) lhs(3,3,aa,k) = - tmp2 * fjac(3,3,k-1) & - tmp1 * njac(3,3,k-1) & - tmp1 * dz3 lhs(3,4,aa,k) = - tmp2 * fjac(3,4,k-1) & - tmp1 * njac(3,4,k-1) lhs(3,5,aa,k) = - tmp2 * fjac(3,5,k-1) & - tmp1 * njac(3,5,k-1) lhs(4,1,aa,k) = - tmp2 * fjac(4,1,k-1) & - tmp1 * njac(4,1,k-1) lhs(4,2,aa,k) = - tmp2 * fjac(4,2,k-1) & - tmp1 * njac(4,2,k-1) lhs(4,3,aa,k) = - tmp2 * fjac(4,3,k-1) & - tmp1 * njac(4,3,k-1) lhs(4,4,aa,k) = - tmp2 * fjac(4,4,k-1) & - tmp1 * njac(4,4,k-1) & - tmp1 * dz4 lhs(4,5,aa,k) = - tmp2 * fjac(4,5,k-1) & - tmp1 * njac(4,5,k-1) lhs(5,1,aa,k) = - tmp2 * fjac(5,1,k-1) & - tmp1 * njac(5,1,k-1) lhs(5,2,aa,k) = - tmp2 * fjac(5,2,k-1) & - tmp1 * njac(5,2,k-1) lhs(5,3,aa,k) = - tmp2 * fjac(5,3,k-1) & - tmp1 * njac(5,3,k-1) lhs(5,4,aa,k) = - tmp2 * fjac(5,4,k-1) & - tmp1 * njac(5,4,k-1) lhs(5,5,aa,k) = - tmp2 * fjac(5,5,k-1) & - tmp1 * njac(5,5,k-1) & - tmp1 * dz5 lhs(1,1,bb,k) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(1,1,k) & + tmp1 * 2.0d+00 * dz1 lhs(1,2,bb,k) = tmp1 * 2.0d+00 * njac(1,2,k) lhs(1,3,bb,k) = tmp1 * 2.0d+00 * njac(1,3,k) lhs(1,4,bb,k) = tmp1 * 2.0d+00 * njac(1,4,k) lhs(1,5,bb,k) = tmp1 * 2.0d+00 * njac(1,5,k) lhs(2,1,bb,k) = tmp1 * 2.0d+00 * njac(2,1,k) lhs(2,2,bb,k) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(2,2,k) & + tmp1 * 2.0d+00 * dz2 lhs(2,3,bb,k) = tmp1 * 2.0d+00 * njac(2,3,k) lhs(2,4,bb,k) = tmp1 * 2.0d+00 * njac(2,4,k) lhs(2,5,bb,k) = tmp1 * 2.0d+00 * njac(2,5,k) lhs(3,1,bb,k) = tmp1 * 2.0d+00 * njac(3,1,k) lhs(3,2,bb,k) = tmp1 * 2.0d+00 * njac(3,2,k) lhs(3,3,bb,k) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(3,3,k) & + tmp1 * 2.0d+00 * dz3 lhs(3,4,bb,k) = tmp1 * 2.0d+00 * njac(3,4,k) lhs(3,5,bb,k) = tmp1 * 2.0d+00 * njac(3,5,k) lhs(4,1,bb,k) = tmp1 * 2.0d+00 * njac(4,1,k) lhs(4,2,bb,k) = tmp1 * 2.0d+00 * njac(4,2,k) lhs(4,3,bb,k) = tmp1 * 2.0d+00 * njac(4,3,k) lhs(4,4,bb,k) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(4,4,k) & + tmp1 * 2.0d+00 * dz4 lhs(4,5,bb,k) = tmp1 * 2.0d+00 * njac(4,5,k) lhs(5,1,bb,k) = tmp1 * 2.0d+00 * njac(5,1,k) lhs(5,2,bb,k) = tmp1 * 2.0d+00 * njac(5,2,k) lhs(5,3,bb,k) = tmp1 * 2.0d+00 * njac(5,3,k) lhs(5,4,bb,k) = tmp1 * 2.0d+00 * njac(5,4,k) lhs(5,5,bb,k) = 1.0d+00 & + tmp1 * 2.0d+00 * njac(5,5,k) & + tmp1 * 2.0d+00 * dz5 lhs(1,1,cc,k) = tmp2 * fjac(1,1,k+1) & - tmp1 * njac(1,1,k+1) & - tmp1 * dz1 lhs(1,2,cc,k) = tmp2 * fjac(1,2,k+1) & - tmp1 * njac(1,2,k+1) lhs(1,3,cc,k) = tmp2 * fjac(1,3,k+1) & - tmp1 * njac(1,3,k+1) lhs(1,4,cc,k) = tmp2 * fjac(1,4,k+1) & - tmp1 * njac(1,4,k+1) lhs(1,5,cc,k) = tmp2 * fjac(1,5,k+1) & - tmp1 * njac(1,5,k+1) lhs(2,1,cc,k) = tmp2 * fjac(2,1,k+1) & - tmp1 * njac(2,1,k+1) lhs(2,2,cc,k) = tmp2 * fjac(2,2,k+1) & - tmp1 * njac(2,2,k+1) & - tmp1 * dz2 lhs(2,3,cc,k) = tmp2 * fjac(2,3,k+1) & - tmp1 * njac(2,3,k+1) lhs(2,4,cc,k) = tmp2 * fjac(2,4,k+1) & - tmp1 * njac(2,4,k+1) lhs(2,5,cc,k) = tmp2 * fjac(2,5,k+1) & - tmp1 * njac(2,5,k+1) lhs(3,1,cc,k) = tmp2 * fjac(3,1,k+1) & - tmp1 * njac(3,1,k+1) lhs(3,2,cc,k) = tmp2 * fjac(3,2,k+1) & - tmp1 * njac(3,2,k+1) lhs(3,3,cc,k) = tmp2 * fjac(3,3,k+1) & - tmp1 * njac(3,3,k+1) & - tmp1 * dz3 lhs(3,4,cc,k) = tmp2 * fjac(3,4,k+1) & - tmp1 * njac(3,4,k+1) lhs(3,5,cc,k) = tmp2 * fjac(3,5,k+1) & - tmp1 * njac(3,5,k+1) lhs(4,1,cc,k) = tmp2 * fjac(4,1,k+1) & - tmp1 * njac(4,1,k+1) lhs(4,2,cc,k) = tmp2 * fjac(4,2,k+1) & - tmp1 * njac(4,2,k+1) lhs(4,3,cc,k) = tmp2 * fjac(4,3,k+1) & - tmp1 * njac(4,3,k+1) lhs(4,4,cc,k) = tmp2 * fjac(4,4,k+1) & - tmp1 * njac(4,4,k+1) & - tmp1 * dz4 lhs(4,5,cc,k) = tmp2 * fjac(4,5,k+1) & - tmp1 * njac(4,5,k+1) lhs(5,1,cc,k) = tmp2 * fjac(5,1,k+1) & - tmp1 * njac(5,1,k+1) lhs(5,2,cc,k) = tmp2 * fjac(5,2,k+1) & - tmp1 * njac(5,2,k+1) lhs(5,3,cc,k) = tmp2 * fjac(5,3,k+1) & - tmp1 * njac(5,3,k+1) lhs(5,4,cc,k) = tmp2 * fjac(5,4,k+1) & - tmp1 * njac(5,4,k+1) lhs(5,5,cc,k) = tmp2 * fjac(5,5,k+1) & - tmp1 * njac(5,5,k+1) & - tmp1 * dz5 end do ! ! ! ! performs guaussian elimination on this cell. ! ! assumes that unpacking routines for non-first cells ! preload C' and rhs' from previous cell. ! ! assumed send happens outside this routine, but that ! c'(KMAX) and rhs'(KMAX) will be sent to next cell. ! ! ! outer most do loops - sweeping in i direction ! ! ! multiply c(i,j,0) by b_inverse and copy back to c ! multiply rhs(0) by b_inverse(0) and copy to rhs ! call binvcrhs( lhs(1,1,bb,0), & lhs(1,1,cc,0), & rhs(1,i,j,0) ) ! ! begin inner most do loop ! do all the elements of the cell unless last ! do k=1,ksize-1 ! ! subtract A*lhs_vector(k-1) from lhs_vector(k) ! ! rhs(k) = rhs(k) - A*rhs(k-1) ! call matvec_sub(lhs(1,1,aa,k), & rhs(1,i,j,k-1),rhs(1,i,j,k)) ! ! B(k) = B(k) - C(k-1)*A(k) ! call matmul_sub(aa,i,j,k,c,cc,i,j,k-1,c,bb,i,j,k) ! call matmul_sub(lhs(1,1,aa,k), & lhs(1,1,cc,k-1), & lhs(1,1,bb,k)) ! ! multiply c(i,j,k) by b_inverse and copy back to c ! multiply rhs(i,j,1) by b_inverse(i,j,1) and copy to rhs ! call binvcrhs( lhs(1,1,bb,k), & lhs(1,1,cc,k), & rhs(1,i,j,k) ) end do ! ! Now finish up special cases for last cell ! ! ! rhs(ksize) = rhs(ksize) - A*rhs(ksize-1) ! call matvec_sub(lhs(1,1,aa,ksize), & rhs(1,i,j,ksize-1),rhs(1,i,j,ksize)) ! ! B(ksize) = B(ksize) - C(ksize-1)*A(ksize) ! call matmul_sub(aa,i,j,ksize,c, ! $ cc,i,j,ksize-1,c,bb,i,j,ksize) ! call matmul_sub(lhs(1,1,aa,ksize), & lhs(1,1,cc,ksize-1), & lhs(1,1,bb,ksize)) ! ! multiply rhs(ksize) by b_inverse(ksize) and copy to rhs ! call binvrhs( lhs(1,1,bb,ksize), & rhs(1,i,j,ksize) ) ! ! back solve: if last cell, then generate U(ksize)=rhs(ksize) ! else assume U(ksize) is loaded in un pack backsub_info ! so just use it ! after call u(kstart) will be sent to next cell ! do k=ksize-1,0,-1 do m=1,BLOCK_SIZE do n=1,BLOCK_SIZE rhs(m,i,j,k) = rhs(m,i,j,k) & - lhs(m,n,cc,k)*rhs(n,i,j,k+1) end do end do end do end do end do if (timeron) call timer_stop(t_zsolve) return end