Skip to content
Snippets Groups Projects
Commit 7992139e authored by Jakub Homola's avatar Jakub Homola
Browse files

omp offload pi timer changes

parent 37f0ca3e
No related branches found
No related tags found
No related merge requests found
......@@ -2,26 +2,22 @@
CLANG=/opt/rocm/llvm/bin/clang++
.PHONY: compile clean run pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x
.PHONY: all clean run seq omp offload_clang offload_aomp hip
all: seq omp offload_clang offload_aomp hip
compile: pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x
clean:
rm -rf *.x
run: all
@echo "Sequential" && /usr/bin/time -f '%e' ./pi_seq.x 1000000000 > /dev/null
@echo "OpenMP" && /usr/bin/time -f '%e' ./pi_omp.x 10000000000 > /dev/null
@echo "OpenMP offloading Clang" && /usr/bin/time -f '%e' ./pi_gpu_clang.x 10000000000 > /dev/null
@echo "OpenMP offloading AOMP" && /usr/bin/time -f '%e' ./pi_gpu_aomp.x 10000000000 > /dev/null
@echo "Classic HIP" && /usr/bin/time -f '%e' ./pi_hip.x 10000000000 > /dev/null
seq: pi_seq.x
omp: pi_omp.x
offload_clang: pi_gpu_clang.x
offload_aomp: pi_gpu_aomp.x
hip: pi_hip.x
run: compile
@echo "Sequential" && ./pi_seq.x 1000000000 && echo
@echo "OpenMP" && ./pi_omp.x 10000000000 && echo
@echo "OpenMP offloading Clang" && ./pi_gpu_clang.x 10000000000 && echo
@echo "OpenMP offloading AOMP" && ./pi_gpu_aomp.x 10000000000 && echo
@echo "Classic HIP" && ./pi_hip.x 10000000000 && echo
pi_seq.x: pi_seq.cpp
......
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include "timer.h"
#include <hip/hip_runtime.h>
__global__ void pi_calc(long long count, double * result)
{
extern __shared__ volatile double sh_results[];
......@@ -29,6 +32,8 @@ __global__ void pi_calc(long long count, double * result)
atomicAdd(result, sh_results[0]);
}
int main(int argc, char **argv)
{
if(argc <= 1)
......@@ -47,14 +52,19 @@ int main(int argc, char **argv)
int bpg = 500;
int tpb = 1024;
int shmem_size = tpb * sizeof(double);
timer t(true);
pi_calc<<< bpg, tpb, shmem_size >>>(count, d_result);
hipDeviceSynchronize();
t.stop();
hipMemcpy(&result, d_result, sizeof(double), hipMemcpyDeviceToHost);
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Computation time: %7.3f s\n", t.get_total_time());
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include "timer.h"
int main(int argc, char **argv)
{
......@@ -14,17 +15,20 @@ int main(int argc, char **argv)
double result = 0;
timer t(true);
#pragma omp parallel for reduction(+:result)
for(long long i = 0; i < count; i++)
{
double x = (i + 0.5) / count;
result += 4 / (x * x + 1);
}
t.stop();
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Computation time: %7.3f s\n", t.get_total_time());
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include "timer.h"
int main(int argc, char **argv)
{
......@@ -14,17 +15,20 @@ int main(int argc, char **argv)
double result = 0;
timer t(true);
#pragma omp target teams distribute parallel for map(to:count) map(tofrom:result) reduction(+:result)
for(long long i = 0; i < count; i++)
{
double x = (i + 0.5) / count;
result += 4 / (x * x + 1);
}
t.stop();
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Computation time: %7.3f s\n", t.get_total_time());
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include "timer.h"
int main(int argc, char **argv)
{
......@@ -14,16 +15,19 @@ int main(int argc, char **argv)
double result = 0;
timer t(true);
for(long long i = 0; i < count; i++)
{
double x = (i + 0.5) / count;
result += 4 / (x * x + 1);
}
t.stop();
result /= count;
printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Real value of pi from cmath library is %.18f\n", M_PI);
printf("Computation time: %7.3f s\n", t.get_total_time());
return 0;
}
#pragma once
#include <chrono>
class timer
{
private:
std::chrono::steady_clock::duration total_duration;
std::chrono::steady_clock::duration lap_duration;
std::chrono::steady_clock::time_point start_point;
static constexpr double ticks_to_sec = ((double)std::chrono::steady_clock::period::num) / std::chrono::steady_clock::period::den; // duration of one tick
public:
timer(bool start = false)
{
this->reset();
if(start)
this->start();
}
void start()
{
this->start_point = std::chrono::steady_clock::now();
}
void stop()
{
auto stop_point = std::chrono::steady_clock::now();
this->lap_duration = stop_point - start_point;
this->total_duration += this->lap_duration;
}
void reset()
{
this->total_duration = std::chrono::steady_clock::duration(0);
this->lap_duration = std::chrono::steady_clock::duration(0);
}
std::chrono::steady_clock::duration get_lap_duration() const
{
return this->lap_duration;
}
std::chrono::steady_clock::duration get_total_duration() const
{
return this->total_duration;
}
double get_lap_time() const
{
return (double)this->lap_duration.count() * ticks_to_sec;
}
double get_total_time() const
{
return this->total_duration.count() * ticks_to_sec;
}
};
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment