************************************************* * Program: STREAM2 * * Revision: 0.1, 99.10.26 * * Author: John McCalpin * * john@mccalpin.com * ************************************************* *----------------------------------------------------------------------- * Copyright 1991-2003: John D. McCalpin *----------------------------------------------------------------------- * License: * 1. You are free to use this program and/or to redistribute * this program. * 2. You are free to modify this program for your own use, * including commercial use, subject to the publication * restrictions in item 3. * 3. You are free to publish results obtained from running this * program, or from works that you derive from this program, * with the following limitations: * 3a. In order to be referred to as "STREAM2 benchmark results", * published results must be in conformance to the STREAM * Run Rules, (briefly reviewed below) published at * http://www.cs.virginia.edu/stream/ref.html * and incorporated herein by reference. * As the copyright holder, John McCalpin retains the * right to determine conformity with the Run Rules. * 3b. Results based on modified source code or on runs not in * accordance with the STREAM Run Rules must be clearly * labelled whenever they are published. Examples of * proper labelling include: * "tuned STREAM2 benchmark results" * "based on a variant of the STREAM2 benchmark code" * Other comparable, clear and reasonable labelling is * acceptable. * 3c. Submission of results to the STREAM benchmark web site * is encouraged, but not required. * 4. Use of this program or creation of derived works based on this * program constitutes acceptance of these licensing restrictions. * 5. Absolutely no warranty is expressed or implied. *----------------------------------------------------------------------- ************************************************* * This program measures sustained bandwidth * * using four computational kernels: * * * * FILL: a(i) = 0 * * COPY: a(i) = b(i) * * DAXPY: a(i) = a(i) + q*b(i) * * DOT: sum += a(i) * b(i) * * * * Results are presented in MB/s, assuming * * 8 Bytes per iteration for FILL, * * 16 Bytes per iteration for COPY & DOT, and * * 24 Bytes per iteration for DAXPY * ************************************************* program stream2 integer NMIN, NMAX, NTIMES, NUMSIZES parameter (NMIN=30,NMAX=2 000 000) parameter (NTIMES=10,NUMSIZES=32) integer NPAD parameter (NPAD=5) real*8 a(NMAX+NPAD),b(NMAX+NPAD) real*8 time(4,NTIMES),mysecond,scalar real*8 sum,start,finish real*8 sum0,sum1,sum2,sum3,sum4,sum5,sum6,sum7 real*8 rate(4),besttime(4),bytes(4) real*8 exp,tdelta logical ALLTIMES integer i,j,k,l,M external mysecond data bytes/8,16,24,16/ data ALLTIMES/.false./ * check timer granularity do i=1,min(10000,NMAX) a(i) = 0.0d0 end do do i=1,min(10000,NMAX) a(i) = mysecond() end do tdelta = 1.d36 do i=1,min(10000,NMAX)-1 if (a(i+1).ne.a(i)) then tdelta = min(tdelta,abs(a(i+1)-a(i))) end if end do print *,'Smallest time delta is ',tdelta print *,' Size Iter FILL COPY DAXPY DOT' * Loop over problem size do j=1,NUMSIZES exp = log10(dble(NMIN)) + dble(j-1)/dble(NUMSIZES-1)* $ (log10(dble(NMAX))-log10(dble(NMIN))) M = NINT(10.**exp) * Initialize Arrays do i=1,M a(i) = 0.0d0 b(i) = 0.0d0 end do do k=1,NTIMES inner = NMAX/M start = mysecond() do l=1,inner scalar = dble(k+l) do i=1,M a(i) = scalar end do end do finish = mysecond() time(1,k) = (finish-start)/dble(inner) start = mysecond() do l=1,inner a(l) = 1.0d0 do i=1,M b(i) = a(i) end do end do finish = mysecond() time(2,k) = (finish-start)/dble(inner) start = mysecond() do l=1,inner a(l) = 1.0d0 do i=1,M b(i) = b(i) + scalar*a(i) end do end do finish = mysecond() time(3,k) = (finish-start)/dble(inner) start = mysecond() do l=1,inner b(l) = 1.0d0 sum0 = 0.0d0 sum1 = 0.0d0 sum2 = 0.0d0 sum3 = 0.0d0 sum4 = 0.0d0 sum5 = 0.0d0 sum6 = 0.0d0 sum7 = 0.0d0 do i=1,M,8 sum0 = sum0 + a(i+0)*b(i+0) sum1 = sum1 + a(i+1)*b(i+1) sum2 = sum2 + a(i+2)*b(i+2) sum3 = sum3 + a(i+3)*b(i+3) sum4 = sum4 + a(i+4)*b(i+4) sum5 = sum5 + a(i+5)*b(i+5) sum6 = sum6 + a(i+6)*b(i+6) sum7 = sum7 + a(i+7)*b(i+7) end do end do sum = sum0 + sum1 + sum2 + sum3 $ + sum4 + sum5 + sum6 + sum7 finish = mysecond() time(4,k) = (finish-start)/dble(inner) end do do i=1,4 besttime(i) = 1.d+36 do k=1,NTIMES besttime(i) = min(besttime(i),time(i,k)) if (ALLTIMES) print *,i,k,time(i,k) end do rate(i) = dble(M)* bytes(i)/besttime(i) / 1.d6 end do write (*,1) M,NTIMES,rate(1),rate(2),rate(3),rate(4), $ tdelta/besttime(1) open (unit=3,form='unformatted') write (3) sum close (unit=3) end do 1 format (1x,i8,2x,i4,1x,5(f8.1,2x)) end