!! ! ! ! N A S P A R A L L E L B E N C H M A R K S 3.3 ! ! ! ! S E R I A L V E R S I O N ! ! ! ! C G ! ! ! !! ! ! ! This benchmark is a serial version of the NPB CG code. ! ! Refer to NAS Technical Reports 95-020 for details. ! ! ! ! Permission to use, copy, distribute and modify this software ! ! for any purpose with or without fee is hereby granted. We ! ! request, however, that all derived work reference the NAS ! ! Parallel Benchmarks 3.3. This software is provided "as is" ! ! without express or implied warranty. ! ! ! ! Information on NPB 3.3, including the technical report, the ! ! original specifications, source code, results and information ! ! on how to submit new results, is available at: ! ! ! ! http://www.nas.nasa.gov/Software/NPB/ ! ! ! ! Send comments or suggestions to npb@nas.nasa.gov ! ! ! ! NAS Parallel Benchmarks Group ! ! NASA Ames Research Center ! ! Mail Stop: T27A-1 ! ! Moffett Field, CA 94035-1000 ! ! ! ! E-mail: npb@nas.nasa.gov ! ! Fax: (650) 604-3957 ! ! ! !! ! ! NPB CG serial version ! ! Authors: M. Yarrow ! C. Kuszmaul ! program main !*****************************************************************************80 ! !! MAIN is the main program for CG_SERIAL. ! implicit none include 'globals.h' common / main_int_mem / colidx, rowstr, & iv, arow, acol integer colidx(nz), rowstr(na+1), & iv(na), arow(na), acol(naz) common / main_flt_mem / aelt, a, & x, & z, & p, & q, & r double precision aelt(naz), a(nz), & x(na+2), & z(na+2), & p(na+2), & q(na+2), & r(na+2) integer i, j, k, it double precision zeta, randlc external randlc double precision rnorm double precision norm_temp1,norm_temp2 double precision t, mflops, tmax character class logical verified double precision zeta_verify_value, epsilon, err integer fstatus character t_names(t_last)*8 do i = 1, T_last call timer_clear( i ) end do open(unit=2, file='timer.flag', status='old', iostat=fstatus) if (fstatus .eq. 0) then timeron = .true. t_names(t_init) = 'init' t_names(t_bench) = 'benchmk' t_names(t_conj_grad) = 'conjgd' close(2) else timeron = .false. endif call timer_start( T_init ) firstrow = 1 lastrow = na firstcol = 1 lastcol = na if( na .eq. 1400 .and. & nonzer .eq. 7 .and. & niter .eq. 15 .and. & shift .eq. 10.d0 ) then class = 'S' zeta_verify_value = 8.5971775078648d0 else if( na .eq. 7000 .and. & nonzer .eq. 8 .and. & niter .eq. 15 .and. & shift .eq. 12.d0 ) then class = 'W' zeta_verify_value = 10.362595087124d0 else if( na .eq. 14000 .and. & nonzer .eq. 11 .and. & niter .eq. 15 .and. & shift .eq. 20.d0 ) then class = 'A' zeta_verify_value = 17.130235054029d0 else if( na .eq. 75000 .and. & nonzer .eq. 13 .and. & niter .eq. 75 .and. & shift .eq. 60.d0 ) then class = 'B' zeta_verify_value = 22.712745482631d0 else if( na .eq. 150000 .and. & nonzer .eq. 15 .and. & niter .eq. 75 .and. & shift .eq. 110.d0 ) then class = 'C' zeta_verify_value = 28.973605592845d0 else if( na .eq. 1500000 .and. & nonzer .eq. 21 .and. & niter .eq. 100 .and. & shift .eq. 500.d0 ) then class = 'D' zeta_verify_value = 52.514532105794d0 else if( na .eq. 9000000 .and. & nonzer .eq. 26 .and. & niter .eq. 100 .and. & shift .eq. 1.5d3 ) then class = 'E' zeta_verify_value = 77.522164599383d0 else class = 'U' endif write( *,1000 ) write( *,1001 ) na write( *,1002 ) niter write( *,* ) 1000 format(//,' NAS Parallel Benchmarks (NPB3.3-SER)', & ' - CG Benchmark', /) 1001 format(' Size: ', i11 ) 1002 format(' Iterations: ', i5 ) naa = na nzz = nz ! ! Inialize random number generator ! tran = 314159265.0D0 amult = 1220703125.0D0 zeta = randlc( tran, amult ) ! ! ! call makea(naa, nzz, a, colidx, rowstr, & firstrow, lastrow, firstcol, lastcol, & arow, acol, aelt, iv) ! ! Note: as a result of the above call to makea: ! values of j used in indexing rowstr go from 1 --> lastrow-firstrow+1 ! values of colidx which are col indexes go from firstcol --> lastcol ! So: ! Shift the col index vals from actual (firstcol --> lastcol ) ! to local, i.e., (1 --> lastcol-firstcol+1) ! do j=1,lastrow-firstrow+1 do k=rowstr(j),rowstr(j+1)-1 colidx(k) = colidx(k) - firstcol + 1 end do end do ! ! set starting vector to (1, 1, .... 1) ! do i = 1, na+1 x(i) = 1.0D0 end do do j=1, lastcol-firstcol+1 q(j) = 0.0d0 z(j) = 0.0d0 r(j) = 0.0d0 p(j) = 0.0d0 end do zeta = 0.0d0 ! ! Do one iteration untimed to init all code and data page tables ! (then reinit, start timing, to niter its) ! do it = 1, 1 ! ! The call to the conjugate gradient routine: ! call conj_grad ( colidx, & rowstr, & x, & z, & a, & p, & q, & r, & rnorm ) ! ! zeta = shift + 1/(x.z) ! So, first: (x.z) ! Also, find norm of z ! So, first: (z.z) ! norm_temp1 = 0.0d0 norm_temp2 = 0.0d0 do j=1, lastcol-firstcol+1 norm_temp1 = norm_temp1 + x(j)*z(j) norm_temp2 = norm_temp2 + z(j)*z(j) end do norm_temp2 = 1.0d0 / sqrt( norm_temp2 ) ! ! Normalize z to obtain x ! do j=1, lastcol-firstcol+1 x(j) = norm_temp2*z(j) end do end do ! end of do one iteration untim ! ! set starting vector to (1, 1, .... 1) ! do i = 1, na+1 x(i) = 1.0D0 end do zeta = 0.0d0 call timer_stop( T_init ) write (*, 2000) timer_read(T_init) 2000 format(' Initialization time = ',f15.3,' seconds') call timer_start( T_bench ) ! ! Main Iteration for inverse power method ! do it = 1, niter ! ! The call to the conjugate gradient routine: ! if ( timeron ) call timer_start( T_conj_grad ) call conj_grad ( colidx, & rowstr, & x, & z, & a, & p, & q, & r, & rnorm ) if ( timeron ) call timer_stop( T_conj_grad ) ! ! zeta = shift + 1/(x.z) ! So, first: (x.z) ! Also, find norm of z ! So, first: (z.z) ! norm_temp1 = 0.0d0 norm_temp2 = 0.0d0 do j=1, lastcol-firstcol+1 norm_temp1 = norm_temp1 + x(j)*z(j) norm_temp2 = norm_temp2 + z(j)*z(j) end do norm_temp2 = 1.0d0 / sqrt( norm_temp2 ) zeta = shift + 1.0d0 / norm_temp1 if( it .eq. 1 ) write( *,9000 ) write( *,9001 ) it, rnorm, zeta 9000 format( /,' iteration ||r|| zeta' ) 9001 format( 4x, i5, 7x, e20.14, f20.13 ) ! ! Normalize z to obtain x ! do j=1, lastcol-firstcol+1 x(j) = norm_temp2*z(j) end do end do ! end of main iter inv pow meth call timer_stop( T_bench ) ! ! End of timed section ! t = timer_read( T_bench ) write(*,100) 100 format(' Benchmark completed ') epsilon = 1.d-10 if (class .ne. 'U') then ! err = abs( zeta - zeta_verify_value) err = abs( zeta - zeta_verify_value )/zeta_verify_value if( err .le. epsilon ) then verified = .TRUE. write(*, 200) write(*, 201) zeta write(*, 202) err 200 format(' VERIFICATION SUCCESSFUL ') 201 format(' Zeta is ', E20.13) 202 format(' Error is ', E20.13) else verified = .FALSE. write(*, 300) write(*, 301) zeta write(*, 302) zeta_verify_value 300 format(' VERIFICATION FAILED') 301 format(' Zeta ', E20.13) 302 format(' The correct zeta is ', E20.13) endif else verified = .FALSE. write (*, 400) write (*, 401) write (*, 201) zeta 400 format(' Problem size unknown') 401 format(' NO VERIFICATION PERFORMED') endif if( t .ne. 0. ) then mflops = float( 2*niter*na ) & * ( 3.+float( nonzer*(nonzer+1) ) & + 25.*(5.+float( nonzer*(nonzer+1) )) & + 3. ) / t / 1000000.0 else mflops = 0.0 endif call print_results('CG', class, na, 0, 0, & niter, t, & mflops, ' floating point', & verified, npbversion, compiletime, & cs1, cs2, cs3, cs4, cs5, cs6, cs7) 600 format( i4, 2e19.12) ! ! More timers ! if (.not.timeron) goto 999 tmax = timer_read(T_bench) if (tmax .eq. 0.0) tmax = 1.0 write(*,800) 800 format(' SECTION Time (secs)') do i=1, t_last t = timer_read(i) if (i.eq.t_init) then write(*,810) t_names(i), t else write(*,810) t_names(i), t, t*100./tmax if (i.eq.t_conj_grad) then t = tmax - t write(*,820) 'rest', t, t*100./tmax endif endif 810 format(2x,a8,':',f9.3:' (',f6.2,'%)') 820 format(' --> total ',a8,':',f9.3,' (',f6.2,'%)') end do 999 continue stop end subroutine conj_grad ( colidx, & rowstr, & x, & z, & a, & p, & q, & r, & rnorm ) !*****************************************************************************80 ! !! CONJ_GRAD carries out the conjugate gradient method. ! ! Floating point arrays here are named as in NPB1 spec discussion of ! CG algorithm ! implicit none include 'globals.h' double precision x(*), & z(*), & a(nzz) integer colidx(nzz), rowstr(naa+1) double precision p(*), & q(*), & r(*) integer j, k integer cgit, cgitmax double precision d, sum, rho, rho0, alpha, beta, rnorm data cgitmax / 25 / rho = 0.0d0 ! ! Initialize the CG algorithm: ! do j=1,naa+1 q(j) = 0.0d0 z(j) = 0.0d0 r(j) = x(j) p(j) = r(j) end do ! ! rho = r.r ! Now, obtain the norm of r: First, sum squares of r elements locally... ! do j=1, lastcol-firstcol+1 rho = rho + r(j)*r(j) end do ! ! The conj grad iteration loop ! do cgit = 1, cgitmax ! ! q = A.p ! The partition submatrix-vector multiply: use workspace w ! ! NOTE: this version of the multiply is actually (slightly: maybe %5) ! faster on the sp2 on 16 nodes than is the unrolled-by-2 version ! below. On the Cray t3d, the reverse is true, i.e., the ! unrolled-by-two version is some 10% faster. ! The unrolled-by-8 version below is significantly faster ! on the Cray t3d - overall speed of code is 1.5 times faster. ! do j=1,lastrow-firstrow+1 sum = 0.d0 do k=rowstr(j),rowstr(j+1)-1 sum = sum + a(k)*p(colidx(k)) end do q(j) = sum end do ! do j=1,lastrow-firstrow+1 ! i = rowstr(j) ! iresidue = mod( rowstr(j+1)-i, 2 ) ! sum1 = 0.d0 ! sum2 = 0.d0 ! if( iresidue .eq. 1 ) ! & sum1 = sum1 + a(i)*p(colidx(i)) ! do k=i+iresidue, rowstr(j+1)-2, 2 ! sum1 = sum1 + a(k) *p(colidx(k)) ! sum2 = sum2 + a(k+1)*p(colidx(k+1)) ! end do ! q(j) = sum1 + sum2 ! end do ! do j=1,lastrow-firstrow+1 ! i = rowstr(j) ! iresidue = mod( rowstr(j+1)-i, 8 ) ! sum = 0.d0 ! do k=i,i+iresidue-1 ! sum = sum + a(k)*p(colidx(k)) ! end do ! do k=i+iresidue, rowstr(j+1)-8, 8 ! sum = sum + a(k )*p(colidx(k )) ! & + a(k+1)*p(colidx(k+1)) ! & + a(k+2)*p(colidx(k+2)) ! & + a(k+3)*p(colidx(k+3)) ! & + a(k+4)*p(colidx(k+4)) ! & + a(k+5)*p(colidx(k+5)) ! & + a(k+6)*p(colidx(k+6)) ! & + a(k+7)*p(colidx(k+7)) ! end do ! q(j) = sum ! end do ! ! Obtain p.q ! d = 0.0d0 do j=1, lastcol-firstcol+1 d = d + p(j)*q(j) end do ! ! Obtain alpha = rho / (p.q) ! alpha = rho / d ! ! Save a temporary of rho ! rho0 = rho ! ! Obtain z = z + alpha*p ! and r = r - alpha*q ! rho = 0.0d0 do j=1, lastcol-firstcol+1 z(j) = z(j) + alpha*p(j) r(j) = r(j) - alpha*q(j) end do ! ! rho = r.r ! Now, obtain the norm of r: First, sum squares of r elements locally... ! do j=1, lastcol-firstcol+1 rho = rho + r(j)*r(j) end do ! ! Obtain beta: ! beta = rho / rho0 ! ! p = r + beta*p ! do j=1, lastcol-firstcol+1 p(j) = r(j) + beta*p(j) end do end do ! end of do cgit=1,cgitmax ! ! Compute residual norm explicitly: ||r|| = ||x - A.z|| ! First, form A.z ! The partition submatrix-vector multiply ! sum = 0.0d0 do j=1,lastrow-firstrow+1 d = 0.d0 do k=rowstr(j),rowstr(j+1)-1 d = d + a(k)*z(colidx(k)) end do r(j) = d end do ! ! At this point, r contains A.z ! do j=1, lastcol-firstcol+1 d = x(j) - r(j) sum = sum + d*d end do rnorm = sqrt( sum ) return end subroutine makea( n, nz, a, colidx, rowstr, & firstrow, lastrow, firstcol, lastcol, & arow, acol, aelt, iv ) !*****************************************************************************80 ! !! MAKEA generates the system matrix A with specified sparsity. ! include 'npbparams.h' integer n, nz integer firstrow, lastrow, firstcol, lastcol integer colidx(nz), rowstr(n+1) integer iv(n), arow(n), acol(nonzer+1,n) double precision aelt(nonzer+1,n) double precision a(nz) ! ! generate the test problem for benchmark 6 ! makea generates a sparse matrix with a ! prescribed sparsity distribution ! ! parameter type usage ! ! input ! ! n i number of cols/rows of matrix ! nz i nonzeros as declared array size ! rcond r*8 condition number ! shift r*8 main diagonal shift ! ! output ! ! a r*8 array for nonzeros ! colidx i col indices ! rowstr i row pointers ! ! workspace ! ! iv, arow, acol i ! aelt r*8 ! integer i, iouter, ivelt, nzv, nn1 integer ivc(nonzer+1) double precision vc(nonzer+1) ! ! nonzer is approximately (int(sqrt(nnza /n))); ! external sparse, sprnvc, vecset ! ! nn1 is the smallest power of two not less than n ! nn1 = 1 50 continue nn1 = 2 * nn1 if (nn1 .lt. n) goto 50 ! ! Generate nonzero positions and save for the use in sparse. ! do iouter = 1, n nzv = nonzer call sprnvc( n, nzv, nn1, vc, ivc ) call vecset( n, vc, ivc, nzv, iouter, .5D0 ) arow(iouter) = nzv do ivelt = 1, nzv acol(ivelt, iouter) = ivc(ivelt) aelt(ivelt, iouter) = vc(ivelt) end do end do ! ! ... make the sparse matrix from list of elements with duplicates ! (iv is used as workspace) ! call sparse( a, colidx, rowstr, n, nz, nonzer, arow, acol, & aelt, firstrow, lastrow, & iv, rcond, shift ) return end subroutine sparse( a, colidx, rowstr, n, nz, nonzer, arow, acol, & aelt, firstrow, lastrow, & nzloc, rcond, shift ) !*****************************************************************************80 ! !! SPARSE generates a sparse matrix from a list of row, column, value triples. ! implicit logical (a-z) integer colidx(*), rowstr(*) integer firstrow, lastrow integer n, nz, nonzer, arow(*), acol(nonzer+1,*) double precision a(*), aelt(nonzer+1,*), rcond, shift ! ! rows range from firstrow to lastrow ! the rowstr pointers are defined for nrows = lastrow-firstrow+1 values ! integer nzloc(n), nrows ! ! generate a sparse matrix from a list of ! [col, row, element] tri ! integer i, j, j1, j2, nza, k, kk, nzrow, jcol double precision xi, size, scale, ratio, va ! ! how many rows of result ! nrows = lastrow - firstrow + 1 ! ! ...count the number of triples in each row ! do j = 1, nrows+1 rowstr(j) = 0 end do do i = 1, n do nza = 1, arow(i) j = acol(nza, i) + 1 rowstr(j) = rowstr(j) + arow(i) end do end do rowstr(1) = 1 do j = 2, nrows+1 rowstr(j) = rowstr(j) + rowstr(j-1) end do nza = rowstr(nrows+1) - 1 ! ! ... rowstr(j) now is the location of the first nonzero ! of row j of a ! if (nza .gt. nz) then write(*,*) 'Space for matrix elements exceeded in sparse' write(*,*) 'nza, nzmax = ',nza, nz stop endif ! ! ... preload data pages ! do j = 1, nrows do k = rowstr(j), rowstr(j+1)-1 a(k) = 0.d0 colidx(k) = 0 end do nzloc(j) = 0 end do ! ! ... generate actual values by summing duplicates ! size = 1.0D0 ratio = rcond ** (1.0D0 / dfloat(n)) do i = 1, n do nza = 1, arow(i) j = acol(nza, i) scale = size * aelt(nza, i) do nzrow = 1, arow(i) jcol = acol(nzrow, i) va = aelt(nzrow, i) * scale ! ! ... add the identity * rcond to the generated matrix to bound ! the smallest eigenvalue from below by rcond ! if (jcol .eq. j .and. j .eq. i) then va = va + rcond - shift endif do k = rowstr(j), rowstr(j+1)-1 if (colidx(k) .gt. jcol) then ! ! ... insert colidx here orderly ! do kk = rowstr(j+1)-2, k, -1 if (colidx(kk) .gt. 0) then a(kk+1) = a(kk) colidx(kk+1) = colidx(kk) endif end do colidx(k) = jcol a(k) = 0.d0 goto 40 else if (colidx(k) .eq. 0) then colidx(k) = jcol goto 40 else if (colidx(k) .eq. jcol) then ! ! ... mark the duplicated entry ! nzloc(j) = nzloc(j) + 1 goto 40 endif end do print *,'internal error in sparse: i=',i stop 40 continue a(k) = a(k) + va end do 60 continue end do size = size * ratio end do ! ! ... remove empty entries and generate final results ! do j = 2, nrows nzloc(j) = nzloc(j) + nzloc(j-1) end do do j = 1, nrows if (j .gt. 1) then j1 = rowstr(j) - nzloc(j-1) else j1 = 1 endif j2 = rowstr(j+1) - nzloc(j) - 1 nza = rowstr(j) do k = j1, j2 a(k) = a(nza) colidx(k) = colidx(nza) nza = nza + 1 end do end do do j = 2, nrows+1 rowstr(j) = rowstr(j) - nzloc(j-1) end do nza = rowstr(nrows+1) - 1 ! write (*, 11000) nza return 11000 format ( //,'final nonzero count in sparse ', & /,'number of nonzeros = ', i16 ) end subroutine sprnvc( n, nz, nn1, v, iv ) !*****************************************************************************80 ! !! SPRNVC generates a vector having a given number of nonzero entries. ! implicit none double precision v(*) integer n, nz, nn1, iv(*) common /urando/ amult, tran double precision amult, tran ! ! generate a sparse n-vector (v, iv) ! having nzv nonzeros ! ! mark(i) is set to 1 if position i is nonzero. ! mark is all zero on entry and is reset to all zero before exit ! this corrects a performance bug found by John G. Lewis, caused by ! reinitialization of mark on every one of the n calls to sprnvc ! integer nzv, ii, i, icnvrt external randlc, icnvrt double precision randlc, vecelt, vecloc nzv = 0 100 continue if (nzv .ge. nz) goto 110 vecelt = randlc( tran, amult ) ! ! generate an integer between 1 and n in a portable manner ! vecloc = randlc(tran, amult) i = icnvrt(vecloc, nn1) + 1 if (i .gt. n) goto 100 ! ! was this integer generated already? ! do ii = 1, nzv if (iv(ii) .eq. i) goto 100 end do nzv = nzv + 1 v(nzv) = vecelt iv(nzv) = i goto 100 110 continue return end function icnvrt(x, ipwr2) !*****************************************************************************80 ! !! ICNVRT multiplies a real number in (0,1) by a power of 2 and then floors it. ! implicit logical (a-z) double precision x integer ipwr2, icnvrt ! ! scale a double precision number x in (0,1) by a power of 2 and chop it ! icnvrt = int(ipwr2 * x) return end subroutine vecset(n, v, iv, nzv, i, val) !*****************************************************************************80 ! !! VECSET sets the I-th entry of a sparse vector. ! implicit logical (a-z) integer n, iv(*), nzv, i, k double precision v(*), val ! ! set ith element of sparse vector (v, iv) with ! nzv nonzeros to val ! logical set set = .false. do k = 1, nzv if (iv(k) .eq. i) then v(k) = val set = .true. endif end do if (.not. set) then nzv = nzv + 1 v(nzv) = val iv(nzv) = i endif return end subroutine print_results(name, class, n1, n2, n3, niter, & t, mops, optype, verified, npbversion, & compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) !*****************************************************************************80 ! !! PRINT_RESULTS prints the results. ! implicit none character name*(*) character class*1 integer n1, n2, n3, niter, j double precision t, mops character optype*24, size*15 logical verified character*(*) npbversion, compiletime, & cs1, cs2, cs3, cs4, cs5, cs6, cs7 write (*, 2) name 2 format(//, ' ', A, ' Benchmark Completed.') write (*, 3) Class 3 format(' Class = ', 12x, a12) ! ! If this is not a grid-based problem (EP, FT, CG), then ! we only print n1, which contains some measure of the ! problem size. In that case, n2 and n3 are both zero. ! Otherwise, we print the grid size n1xn2xn3 ! if ((n2 .eq. 0) .and. (n3 .eq. 0)) then if (name(1:2) .eq. 'EP') then write(size, '(f15.0)' ) 2.d0**n1 j = 15 if (size(j:j) .eq. '.') then size(j:j) = ' ' j = j - 1 endif write (*,42) size(1:j) 42 format(' Size = ',9x, a15) else write (*,44) n1 44 format(' Size = ',12x, i12) endif else write (*, 4) n1,n2,n3 4 format(' Size = ',9x, i4,'x',i4,'x',i4) endif write (*, 5) niter 5 format(' Iterations = ', 12x, i12) write (*, 6) t 6 format(' Time in seconds = ',12x, f12.2) write (*,9) mops 9 format(' Mop/s total = ',12x, f12.2) write(*, 11) optype 11 format(' Operation type = ', a24) if (verified) then write(*,12) ' SUCCESSFUL' else write(*,12) 'UNSUCCESSFUL' endif 12 format(' Verification = ', 12x, a) write(*,13) npbversion 13 format(' Version = ', 12x, a12) write(*,14) compiletime 14 format(' Compile date = ', 12x, a12) write (*,121) cs1 121 format(/, ' Compile options:', /, & ' F77 = ', A) write (*,122) cs2 122 format(' FLINK = ', A) write (*,123) cs3 123 format(' F_LIB = ', A) write (*,124) cs4 124 format(' F_INC = ', A) write (*,125) cs5 125 format(' FFLAGS = ', A) write (*,126) cs6 126 format(' FLINKFLAGS = ', A) write(*, 127) cs7 127 format(' RAND = ', A) write (*,130) 130 format(//' Please send all errors/feedbacks to:'// & ' NPB Development Team'/ & ' npb@nas.nasa.gov'//) return end double precision function randlc(x, a) !*****************************************************************************80 ! !! RANDLC returns a uniform pseudorandom double precision number. ! ! Discussion: ! ! The number returned is in the range (0, 1). ! ! The algorithm uses the linear congruential generator ! ! x_{k+1} = a x_k (mod 2^46) ! ! where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers ! before repeating. The argument A is the same as 'a' in the above formula, ! and X is the same as x_0. A and X must be odd double precision integers ! in the range (1, 2^46). The returned value RANDLC is normalized to be ! between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain ! the new seed x_1, so that subsequent calls to RANDLC using the same ! arguments will generate a continuous sequence. ! implicit none double precision x, a integer*8 i246m1, Lx, La double precision d2m46 parameter(d2m46=0.5d0**46) save i246m1 data i246m1/X'00003FFFFFFFFFFF'/ Lx = X La = A Lx = iand(Lx*La,i246m1) randlc = d2m46*dble(Lx) x = dble(Lx) return end subroutine timer_clear ( n ) !*****************************************************************************80 ! !! TIMER_CLEAR clears the timer. ! implicit none integer n double precision start(64), elapsed(64) common /tt/ start, elapsed elapsed(n) = 0.0 return end subroutine timer_start ( n ) !*****************************************************************************80 ! !! TIMER_START starts the timer. ! implicit none external elapsed_time double precision elapsed_time integer n double precision start(64), elapsed(64) common /tt/ start, elapsed start(n) = elapsed_time() return end subroutine timer_stop ( n ) !*****************************************************************************80 ! !! TIMER_STOP stops the timer. ! implicit none external elapsed_time double precision elapsed_time integer n double precision start(64), elapsed(64) common /tt/ start, elapsed double precision t, now now = elapsed_time() t = now - start(n) elapsed(n) = elapsed(n) + t return end double precision function timer_read ( n ) !*****************************************************************************80 ! !! TIMER_READ reads the timer. ! implicit none integer n double precision start(64), elapsed(64) common /tt/ start, elapsed timer_read = elapsed(n) return end function elapsed_time ( ) !*****************************************************************************80 ! !! ELAPSED_TIME measures wall clock time. ! implicit none double precision elapsed_time double precision t ! ! This function must measure wall clock time, not CPU time. ! Since there is no portable timer in Fortran (77) ! we call a routine compiled in C (though the C source may have ! to be tweaked). ! call wtime ( t ) ! ! The following is not ok for "official" results because it reports ! CPU time not wall clock time. It may be useful for developing/testing ! on timeshared Crays, though. ! call second(t) ! elapsed_time = t return end