Skip to content
Snippets Groups Projects
Commit 37f0ca3e authored by Jakub Homola's avatar Jakub Homola
Browse files

openmp offloading AOMP examples

parent 3955ec42
Branches
No related tags found
No related merge requests found
.PHONY: compile clean run
compile: vadd.x
clean:
rm -rf *.x
run: vadd.x
./vadd.x
vadd.x: vadd.cpp
aompcc -O2 $< -o $@
#include <cstdio>
#include <cstdlib>
int main(int argc, char ** argv)
{
long long count = 1 << 20;
if(argc > 1)
count = atoll(argv[1]);
long long print_count = 16;
if(argc > 2)
print_count = atoll(argv[2]);
long long * a = new long long[count];
long long * b = new long long[count];
long long * c = new long long[count];
#pragma omp parallel for
for(long long i = 0; i < count; i++)
{
a[i] = i;
b[i] = 10 * i;
}
printf("A: ");
for(long long i = 0; i < print_count; i++)
printf("%3lld ", a[i]);
printf("\n");
printf("B: ");
for(long long i = 0; i < print_count; i++)
printf("%3lld ", b[i]);
printf("\n");
#pragma omp target map(to: a[0:count],b[0:count]) map(from: c[0:count])
#pragma omp teams distribute parallel for
for(long long i = 0; i < count; i++)
{
c[i] = a[i] + b[i];
}
printf("C: ");
for(long long i = 0; i < print_count; i++)
printf("%3lld ", c[i]);
printf("\n");
delete[] a;
delete[] b;
delete[] c;
return 0;
}
CLANG=/opt/rocm/llvm/bin/clang++
.PHONY: all clean run seq omp offload_clang offload_aomp hip
all: seq omp offload_clang offload_aomp hip
clean:
rm -rf *.x
run: all
@echo "Sequential" && /usr/bin/time -f '%e' ./pi_seq.x 1000000000 > /dev/null
@echo "OpenMP" && /usr/bin/time -f '%e' ./pi_omp.x 10000000000 > /dev/null
@echo "OpenMP offloading Clang" && /usr/bin/time -f '%e' ./pi_gpu_clang.x 10000000000 > /dev/null
@echo "OpenMP offloading AOMP" && /usr/bin/time -f '%e' ./pi_gpu_aomp.x 10000000000 > /dev/null
@echo "Classic HIP" && /usr/bin/time -f '%e' ./pi_hip.x 10000000000 > /dev/null
seq: pi_seq.x
omp: pi_omp.x
offload_clang: pi_gpu_clang.x
offload_aomp: pi_gpu_aomp.x
hip: pi_hip.x
pi_seq.x: pi_seq.cpp
${CLANG} -O2 $< -o $@
pi_omp.x: pi_omp.cpp
${CLANG} -O2 -fopenmp $< -o $@
pi_gpu_clang.x: pi_omp_offload.cpp
${CLANG} -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx900 $< -o $@
pi_gpu_aomp.x: pi_omp_offload.cpp
aompcc -O2 $< -o $@
pi_hip.x: pi_hip.hip.cpp
hipcc -O2 $< -o $@
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <hip/hip_runtime.h>
__global__ void pi_calc(long long count, double * result)
{
extern __shared__ volatile double sh_results[];
double my_result = 0;
for(long long i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x)
{
double x = (i + 0.5) / count;
my_result += 4 / (x * x + 1);
}
sh_results[threadIdx.x] = my_result;
int thread_count = blockDim.x >> 1;
while(thread_count > 0)
{
if(threadIdx.x >= thread_count)
return;
__syncthreads();
sh_results[threadIdx.x] += sh_results[threadIdx.x + thread_count];
thread_count >>= 1;
}
atomicAdd(result, sh_results[0]);
}
int main(int argc, char **argv)
{
if(argc <= 1)
{
fprintf(stderr, "Not enough arguments\n");
return 1;
}
long long count = atoll(argv[1]);
double result = 0;
double * d_result;
hipMalloc(&d_result, sizeof(double));
hipMemcpy(d_result, &result, sizeof(double), hipMemcpyHostToDevice);
int bpg = 500;
int tpb = 1024;
int shmem_size = tpb * sizeof(double);
pi_calc<<< bpg, tpb, shmem_size >>>(count, d_result);
hipMemcpy(&result, d_result, sizeof(double), hipMemcpyDeviceToHost);
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cmath>
int main(int argc, char **argv)
{
if(argc <= 1)
{
fprintf(stderr, "Not enough arguments\n");
return 1;
}
long long count = atoll(argv[1]);
double result = 0;
#pragma omp parallel for reduction(+:result)
for(long long i = 0; i < count; i++)
{
double x = (i + 0.5) / count;
result += 4 / (x * x + 1);
}
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cmath>
int main(int argc, char **argv)
{
if(argc <= 1)
{
fprintf(stderr, "Not enough arguments\n");
return 1;
}
long long count = atoll(argv[1]);
double result = 0;
#pragma omp target teams distribute parallel for map(to:count) map(tofrom:result) reduction(+:result)
for(long long i = 0; i < count; i++)
{
double x = (i + 0.5) / count;
result += 4 / (x * x + 1);
}
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cmath>
int main(int argc, char **argv)
{
if(argc <= 1)
{
fprintf(stderr, "Not enough arguments\n");
return 1;
}
long long count = atoll(argv[1]);
double result = 0;
for(long long i = 0; i < count; i++)
{
double x = (i + 0.5) / count;
result += 4 / (x * x + 1);
}
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment