Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
//by Branislav Jansik, @IT4Innovations, 2022
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <omp.h>
//blas declarations
double dnrm2_(int *n, double *x, int *ix);
void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, \
double *b, int *ldb, double *beta, double *c, int *ldc);
//pcg random number generator
typedef struct { uint64_t state; uint64_t inc; } pcg32_random_t;
uint32_t pcg32_random_r(pcg32_random_t* rng)
{
uint64_t oldstate = rng->state;
// Advance internal state
rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1);
// Calculate output function (XSH RR), uses old state for max ILP
uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
uint32_t rot = oldstate >> 59u;
return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
}
int main(int argc, char *argv[]) {
//declarations
double *c, *z;
double t0, t;
int ONEI=1, ZEROI=0;
double ONED=1.0, ONEDm=-1.0, ZEROD=0.0;
char N = 'N';
register int i, j;
int NMAT=500, NITER=10, NMAT2;
pcg32_random_t rng;
//read inputs
if (argc==1) printf("Usage: %s [NMAT] [NITER]\n",argv[0]);
if (argc>1) NMAT = atoi(argv[1]);
NMAT2 = NMAT*NMAT;
if (argc>2) NITER = atoi(argv[2]);
//print inputs
printf("Running %d iterations\nMatrix size: %d\nMemory: %.1f MiB\n", NITER, NMAT, (sizeof(double)*(double)(2*NMAT2)/1024/1024));
//allocate memory
posix_memalign((void *) &c, (size_t)64, (size_t) sizeof(double)*NMAT2 );
posix_memalign((void *) &z, (size_t)64, (size_t) sizeof(double)*NMAT2 );
//initialize matrices
#pragma omp parallel for default(shared) private(i,rng) schedule(static,8)
for (i=0; i<NMAT2; i++) {
c[i]=(double)(pcg32_random_r(&rng)%100000)/100000/NMAT;
z[i]=(double)0.0;
}
//run mandelbrot inspired iterations
t0 = omp_get_wtime();
for (i=0; i<NITER; i++) {
//C=Z*Z+C
dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,z,&NMAT,z,&NMAT,&ONED,c,&NMAT);
//Z=C*C+Z
dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,c,&NMAT,c,&NMAT,&ONED,z,&NMAT);
//C=Z*Z-C
dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,z,&NMAT,z,&NMAT,&ONEDm,c,&NMAT);
//Z=C*C-Z
dgemm_(&N,&N,&NMAT,&NMAT,&NMAT,&ONED,c,&NMAT,c,&NMAT,&ONEDm,z,&NMAT);
}
t = omp_get_wtime();
//print outputs
printf("\nPerformance: %.1f GFlop/s\n\n",NITER*((double)4*2*NMAT2*NMAT)/1e9/(t-t0));
printf("Norm c: %f\n",dnrm2_(&NMAT2,c,&ONEI));
printf("Norm z: %f\n",dnrm2_(&NMAT2,z,&ONEI));
//release memory
free(c);
free(z);
return 0;
}