Streams results for Cray T3E-1200

From: Joe Glenski (glenski@sgi.com)
Date: Wed May 03 2000 - 14:48:55 CDT

  • Next message: Christian Simmendinger: "SR8000"

    John-

      Shown below are "stream" results for a 600 MHz CRAY T3E-1200
    with 512 PEs.

      A copy of the program used (based on the one C. Grassl used for
    earlier T3E systems) is also included.

    Regards,

    -Joe Glenski.

    Benchmarking Group
    Cray Inc., 655F Lone Oak Drive, Eagan, MN 55121 USA
    glenski@cray.com

    ------------------------------------------------------------------------

    Number of iterations: 10
    Size of Arrays: 501 Kwords

    STREAM Memory Bandwidth for Cray T3E-1200 4/28/2000

    All results are in MB/s --- 1 MB=10^6 B, *not* 2^20 B

    ---------------------------------------------------------
    Machine ID ncpus COPY SCALE ADD TRIAD Saxpy 1-Load 1-Store
    ---------------------------------------------------------

    Cray_T3E-1200 1 474. 480. 566. 558. 446. 397. 327.
    Cray_T3E-1200 2 951. 962. 1131. 1115. 894. 794. 645.
    Cray_T3E-1200 4 1902. 1918. 2262. 2230. 1788. 1587. 1284.
    Cray_T3E-1200 8 3800. 3849. 4523. 4458. 3577. 3173. 2376.
    Cray_T3E-1200 16 7616. 7696. 9046. 8913. 7153. 6347. 4733.
    Cray_T3E-1200 32 15194. 15386. 18088. 17829. 14304. 12710. 9348.
    Cray_T3E-1200 64 30459. 30764. 36170. 35646. 28607. 25385. 18473.
    Cray_T3E-1200 128 60882. 61554. 72335. 71283. 57209. 50765. 36516.
    Cray_T3E-1200 256 121503. 123064. 144679. 142580. 114418. 101525. 72635.
    Cray_T3E-1200 512 242981. 246031. 288345. 284920. 228289. 203031. 145731.

    COPY, SCALE, ADD, TRIAD are standard STREAM benchmark.
    Saxpy, 1-Load, 1-Store are additional cray-only tests.

    -----------------------------------------------------------------------

          program Stream

    C Based on a version from: C. Grassl, Apr 18 1997
    C
    C Extended by Grassl to report results for three additional
    C tests, "Saxpy", "1 Load", and "1 Store".
    C
    C Updated 4/28/2000 J. Glenski to avoid compiler warnings.

          integer ofst,aoff,boff,coff
          parameter (N = 501*1024, NTIMES = 10)
          parameter (ofst = 8*1024)

          real*8 a(N+8*1024),b(N+8*1024),c(N+8*1024)
          common /acom/a,b,c

          real times(7)
          real avetime(7),rmstime(7),mintime(7),maxtime(7)
          character*10 label(7)
          integer bytes(7),flops(7)
          data avetime/7*0.0/
          data rmstime/7*0.0/
          data mintime/7*1.0e+36/
          data maxtime/7*0.0/

          data label/' Copy',
         . ' Scale',
         . ' Sum',
         . ' Triad',
         . ' Saxpy',
         . ' 1 Load',
         . '1 Store'/

          parameter (nbpw = 8 )
          data bytes/2,2,3,3,3,1,1/
          data flops/0,1,1,2,2,1,0/
          integer shmem_my_pe,shmem_n_pes
          round_up(ix,index) = ((ix + (index-1))/index)*index

          me = shmem_my_pe()
          numpes = shmem_n_pes()

          istart_a = loc(a)
          istart_b = loc(b)
          istart_c = loc(c)

          istart_a = (round_up(istart_a,8092) - istart_a)/8 + 1
          istart_b = (round_up(istart_b,8092) - istart_b)/8 + 1
          istart_c = (round_up(istart_c,8092) - istart_c)/8 + 1

          if ( me .eq. 0 ) then
            write(6,9020) numpes,ntimes,n/1024
            write(6,9030) (label(j),j=1,7)
          end if

          do 1000 k=1,NTIMES

            call bandwidth(n,a(istart_a), b(istart_b), c(istart_c), times)

            avetime = avetime + times
            rmstime = rmstime + times**2
            mintime = min( mintime, times )
            maxtime = max( maxtime, times )

            if ( me .eq. 0) then
            write(6,9040) k,(N*bytes(j)*nbpw*numpes/times(j)/1.0e6,j=1,7)
            end if

     1000 continue

          if ( me .eq. 0) then
          avetime = avetime/NTIMES
          write(6,9050) (N*bytes(j)*nbpw*numpes/mintime(j)/1.0e6,j=1,7),
         . (N*bytes(j)*nbpw*numpes/avetime(j)/1.0e6,j=1,7),
         . (N*bytes(j)*nbpw*numpes/maxtime(j)/1.0e6,j=1,7)
          end if

          call exit()

     9020 format( /' *** STREAM benchmark ***'
         . /' Number of PEs: ',i8
         . /' Number of iterations: ',i8
         . /' Size of Arrays: ',i8,' Kwords')
     9030 format(// 30(' '),'Bandwidth (Mbyte/s)'
         . /' Iterat. ',7a10,
         . / (' -------'),7(' ---------'))
     9040 format ( i6,2x,7f10.0)
     9050 format (/' Max: ',7f10.0/
         . ' Ave: ',7f10.0/
         . ' Min: ',7f10.0/)
          end

          subroutine dummysub(a,b,c,n)
          real a(*),b(*),c(*)
          return
          end

          subroutine bandwidth(n,a,b,c,times)

          real*8 a(n),b(n),c(n)
          real*8 times(7)
          data scalar /3.0/

          common /bandcom/ sum

          timer()=0.001*timef()

          t = timer()
          do j=1,N
              c(j) = a(j)
          end do
          call barrier
          times(1) = timer()-t

          call dummysub(a,b,c,n)

          t = timer()
          do j=1,N
              c(j) = scalar*a(j)
          end do
          call barrier
          times(2) = timer()-t

          call dummysub(a,b,c,n)

          t = timer()
          do j=1,N
              c(j) = a(j) + b(j)
          end do
          call barrier
          times(3) = timer()-t

          call dummysub(a,b,c,n)

          t = timer()
          do j=1,N
            c(j) = a(j)+scalar*b(j)
          end do
          call barrier
          times(4) = timer()-t

          call dummysub(a,b,c,n)

          t = timer()
          do j=1,N
            c(j) = c(j)+scalar*b(j)
          end do
          call barrier
          times(5) = timer()-t

          call dummysub(a,b,c,n)

          sum = 0.
          t = timer()
          do j=1,N
            sum = sum + a(j)
          end do
          call barrier
          times(6) = timer()-t

          call dummysub(a,b,c,n)

          t = timer()
          do j=1,N
            c(j) = 0.
          end do
          call barrier
          times(7) = timer()-t

          call dummysub(a,b,c,n)

          return
          end

    ------------------------------------------------------



    This archive was generated by hypermail 2b29 : Thu May 04 2000 - 03:42:16 CDT