/* * Program: Stream * Programmer: John D. McCalpin * Revision: 2.1, August 30, 1995 * * This program measures memory transfer rates in MB/s for simple * computational kernels coded in Fortran. These numbers reveal the * quality of code generation for simple uncacheable kernels as well * as showing the cost of floating-point operations relative to memory * accesses. * * INSTRUCTIONS: * 1) (fortran-specific, omitted.) * 2) Stream requires a good bit of memory to run. * Adjust the Parameter 'N' in the second line of the main * program to give a 'timing calibration' of at least 20 clicks. * This will provide rate estimates that should be good to * about 5% precision. * 3) Compile the code with full optimization. Many compilers * generate unreasonably bad code before the optimizer tightens * things up. If the results are unreasonable good, on the * other hand, the optimizer might be too smart for me! * 4) Mail the results to mccalpin@cs.virginia.edu * Be sure to include: * a) computer hardware model number and software revision * b) the compiler flags * c) all of the output from the test case. * Thanks! * * this version was ported from fortran to c by mark hahn, hahn+@pitt.edu. */ #define N 1000000 #define NTIMES 10 #ifdef __hpux #define _HPUX_SOURCE 1 #else #define _INCLUDE_POSIX_SOURCE 1 #endif #include #include #include #include #ifndef MIN #define MIN(x,y) ((x)<(y)?(x):(y)) #endif #ifndef MAX #define MAX(x,y) ((x)>(y)?(x):(y)) #endif struct timeval tvStart; void utimeStart() { struct timezone tz; gettimeofday(&tvStart,&tz); } float utime() { struct timeval tv; struct timezone tz; float utime; gettimeofday(&tv,&tz); utime = 1e6 * (tv.tv_sec - tvStart.tv_sec) + tv.tv_usec - tvStart.tv_usec; return utime; } typedef double real; static real a[N],b[N],c[N]; int main() { int j,k; float times[4][NTIMES]; static float rmstime[4] = {0}; static float mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; static float maxtime[4] = {0}; static char *label[4] = {"Assignment:", "Scaling :", "Summing :", "SAXPYing :"}; static float bytes[4] = { 2 * sizeof(real) * N, 2 * sizeof(real) * N, 3 * sizeof(real) * N, 3 * sizeof(real) * N}; /* --- SETUP --- determine precision and check timing --- */ utimeStart(); for (j=0; j