Skip to content
Snippets Groups Projects
jansik-blas.c 2.46 KiB
Newer Older
  • Learn to ignore specific revisions
  • Branislav Jansik's avatar
    Branislav Jansik committed
    //by Branislav Jansik, @IT4Innovations, 2022
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>
    #include <omp.h>
    
    //blas declarations
    double dnrm2_(int *n, double *x, int *ix);
    void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, \
                double *b, int *ldb, double *beta, double *c, int *ldc); 
    
    //pcg random number generator
    typedef struct { uint64_t state;  uint64_t inc; } pcg32_random_t;
    uint32_t pcg32_random_r(pcg32_random_t* rng)
    {
        uint64_t oldstate = rng->state;
        // Advance internal state
        rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1);
        // Calculate output function (XSH RR), uses old state for max ILP
        uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
        uint32_t rot = oldstate >> 59u;
        return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
    }
    
    int main(int argc, char *argv[]) {
    
    //declarations
    double *c, *z;
    double t0, t;
    
    int ONEI=1, ZEROI=0;
    double ONED=1.0, ONEDm=-1.0, ZEROD=0.0;
    char N = 'N';
    
    register int i, j;
    int NMAT=500, NITER=10, NMAT2;
    pcg32_random_t rng;
    
    //read inputs
      if (argc==1) printf("Usage: %s [NMAT] [NITER]\n",argv[0]);
    
      if (argc>1) NMAT    = atoi(argv[1]);
      NMAT2   = NMAT*NMAT;
    
      if (argc>2) NITER = atoi(argv[2]);
    
    //print inputs
      printf("Running %d iterations\nMatrix size: %d\nMemory: %.1f MiB\n", NITER, NMAT, (sizeof(double)*(double)(2*NMAT2)/1024/1024));
    
    //allocate memory
    posix_memalign((void *) &c,  (size_t)64, (size_t) sizeof(double)*NMAT2 );
    posix_memalign((void *) &z,  (size_t)64, (size_t) sizeof(double)*NMAT2 );
    
    //initialize matrices
    
    Branislav Jansik's avatar
    Branislav Jansik committed
    //rng left uninitialized on purpose
    
    Branislav Jansik's avatar
    Branislav Jansik committed
    #pragma omp parallel for default(shared) private(i,rng) schedule(static,8)
    for (i=0; i<NMAT2; i++) {
      c[i]=(double)(pcg32_random_r(&rng)%100000)/100000/NMAT;
      z[i]=(double)0.0;
    }
    
    //run mandelbrot inspired iterations
    t0 = omp_get_wtime();
    for (i=0; i<NITER; i++) {
      //C=Z*Z+C
      dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,z,&NMAT,z,&NMAT,&ONED,c,&NMAT);
      
      //Z=C*C+Z
      dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,c,&NMAT,c,&NMAT,&ONED,z,&NMAT);
    
      //C=Z*Z-C
      dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,z,&NMAT,z,&NMAT,&ONEDm,c,&NMAT);
    
      //Z=C*C-Z
      dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,c,&NMAT,c,&NMAT,&ONEDm,z,&NMAT);
    }
    t = omp_get_wtime();
    
    //print outputs
    printf("\nPerformance: %.1f GFlop/s\n\n",NITER*((double)4*2*NMAT2*NMAT)/1e9/(t-t0));
    
    Branislav Jansik's avatar
    Branislav Jansik committed
    printf("Norm c: %f\n",dnrm2_(&NMAT2,c,&ONEI));
    printf("Norm z: %f\n",dnrm2_(&NMAT2,z,&ONEI));
    
    Branislav Jansik's avatar
    Branislav Jansik committed
    
    //release memory
    free(c);
    free(z);
    
    return 0;
    }