Re: IBM RS/6000 or HP Apollo 9000: which to buy?

From: Mark Hahn (hahn@neurocog.lrdc.pitt.edu)
Date: Wed Oct 27 1993 - 21:21:05 CDT


appended to this message is a a fairly portable C translation of stream.f.
on our hp735 and "cc +P +O3 -J +Om1 -Wl,-a,archive", I get these results:

Timing calibration ; time = 760.00 usec.
Increase the size of the arrays if this is < 300
and your clock precision is =< 1/100 second.
---------------------------------------------------
Function Rate (MB/s) RMS time Min time Max time
Assignment: 69.837 247.083 240.000 260.000
Scaling : 69.837 246.049 240.000 250.000
Summing : 71.832 351.013 350.000 360.000
SAXPYing : 73.945 350.143 340.000 370.000

The code is also available for anon ftp from
neurocog.lrdc.pitt.edu:pub/cstream.c

/*
* Program: Stream
* Programmer: John D. McCalpin
* Revision: 2.0, September 30,1991
*
* This program measures memory transfer rates in MB/s for simple
* computational kernels coded in Fortran. These numbers reveal the
* quality of code generation for simple uncacheable kernels as well
* as showing the cost of floating-point operations relative to memory
* accesses.
*
* INSTRUCTIONS:
* 1) (fortran-specific, omitted.)
* 2) Stream requires a good bit of memory to run.
* Adjust the Parameter 'N' in the second line of the main
* program to give a 'timing calibration' of at least 20 clicks.
* This will provide rate estimates that should be good to
* about 5% precision.
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonable good, on the
* other hand, the optimizer might be too smart for me!
* 4) Mail the results to mccalpin@perelandra.cms.udel.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
*
* Thanks!
*
* This version was ported from the fortran by Mark Hahn, hahn+@pitt.edu.
*/

#define N (1023*1024)
#define NTIMES 10

#define _HPUX_SOURCE 1
#define _POSIX_SOURCE 1
#define _XOPEN_SOURCE 1
#define _INCLUDE_POSIX_SOURCE 1

#include <limits.h>
#include <time.h>
#include <sys/times.h>
#include <math.h>
#include <stdio.h>

#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif

struct tms tmsStart;

void mtimeStart() {
    times(&tmsStart);
}

float mtime() {
    struct tms t;
    times(&t);
    return 1e3 * (float) ((t.tms_stime - tmsStart.tms_stime) +
             (t.tms_utime - tmsStart.tms_utime)) / (float) CLK_TCK;
}

typedef double real;
static real a[N],b[N],c[N];

int main() {
    int j,k;
    float times[4][NTIMES];
    static float rmstime[4] = {0};
    static float mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
    static float maxtime[4] = {0};
    static char *label[4] = {"Assignment:",
                             "Scaling :",
                             "Summing :",
                             "SAXPYing :"};
    static float bytes[4] = { 2 * sizeof(real) * N,
                              2 * sizeof(real) * N,
                              3 * sizeof(real) * N,
                              3 * sizeof(real) * N};

    /* --- SETUP --- determine precision and check timing --- */
    mtimeStart();
    for (j=0; j<N; j++) {
        a[j] = 1.0;
        b[j] = 2.0;
        c[j] = 0.0;
    }
    printf("Timing calibration ; time = %.2f usec.\n",mtime());
    printf("Increase the size of the arrays if this is < 300\n"
           "and your clock precision is =< 1/100 second.\n");
    printf("---------------------------------------------------\n");
    
    /* --- MAIN LOOP --- repeat test cases NTIMES times --- */
    for (k=0; k<NTIMES; k++) {
        mtimeStart();
        for (j=0; j<N; j++)
            c[j] = a[j];
        times[0][k] = mtime();
        
        mtimeStart();
        for (j=0; j<N; j++)
            c[j] = 3.0e0*a[j];
        times[1][k] = mtime();
        
        mtimeStart();
        for (j=0; j<N; j++)
            c[j] = a[j]+b[j];
        times[2][k] = mtime();
        
        mtimeStart();
        for (j=0; j<N; j++)
            c[j] = a[j]+3.0e0*b[j];
        times[3][k] = mtime();
    }
    
    /* --- SUMMARY --- */
    for (k=0; k<NTIMES; k++) {
        for (j=0; j<4; j++) {
            rmstime[j] = rmstime[j] + (times[j][k] * times[j][k]);
            mintime[j] = MIN(mintime[j], times[j][k]);
            maxtime[j] = MAX(maxtime[j], times[j][k]);
        }
    }
    
    printf("Function Rate (MB/s) RMS time Min time Max time\n");
    for (j=0; j<4; j++) {
        rmstime[j] = sqrt(rmstime[j]/(float)NTIMES);

        printf("%s%11.3f %11.3f %11.3f %11.3f\n",
               label[j],
               bytes[j]/mintime[j]/1e3,
               rmstime[j],
               mintime[j],
               maxtime[j]);
    }
    return 0;
}

regards, mark hahn.

--
this space intentionally left non-blank.	hahn@neurocog.lrdc.pitt.edu



This archive was generated by hypermail 2b29 : Tue Apr 18 2000 - 05:23:03 CDT