diff --git a/13_omp_offload_pi/Makefile b/13_omp_offload_pi/Makefile index b7b59bcdcbcacf952f48a847aef25c54fceae106..422296dc7b04d6c97cb24f7cfaea3dbc8601d937 100644 --- a/13_omp_offload_pi/Makefile +++ b/13_omp_offload_pi/Makefile @@ -2,26 +2,22 @@ CLANG=/opt/rocm/llvm/bin/clang++ +.PHONY: compile clean run pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x -.PHONY: all clean run seq omp offload_clang offload_aomp hip -all: seq omp offload_clang offload_aomp hip + +compile: pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x clean: rm -rf *.x -run: all - @echo "Sequential" && /usr/bin/time -f '%e' ./pi_seq.x 1000000000 > /dev/null - @echo "OpenMP" && /usr/bin/time -f '%e' ./pi_omp.x 10000000000 > /dev/null - @echo "OpenMP offloading Clang" && /usr/bin/time -f '%e' ./pi_gpu_clang.x 10000000000 > /dev/null - @echo "OpenMP offloading AOMP" && /usr/bin/time -f '%e' ./pi_gpu_aomp.x 10000000000 > /dev/null - @echo "Classic HIP" && /usr/bin/time -f '%e' ./pi_hip.x 10000000000 > /dev/null - -seq: pi_seq.x -omp: pi_omp.x -offload_clang: pi_gpu_clang.x -offload_aomp: pi_gpu_aomp.x -hip: pi_hip.x +run: compile + @echo "Sequential" && ./pi_seq.x 1000000000 && echo + @echo "OpenMP" && ./pi_omp.x 10000000000 && echo + @echo "OpenMP offloading Clang" && ./pi_gpu_clang.x 10000000000 && echo + @echo "OpenMP offloading AOMP" && ./pi_gpu_aomp.x 10000000000 && echo + @echo "Classic HIP" && ./pi_hip.x 10000000000 && echo + pi_seq.x: pi_seq.cpp diff --git a/13_omp_offload_pi/pi_hip.hip.cpp b/13_omp_offload_pi/pi_hip.hip.cpp index 087689f2cfcb2a1e51cfccd8e4bc6a7671a0b08a..4783491da3df39be76b1859e58f9d17e6a8f17a2 100644 --- a/13_omp_offload_pi/pi_hip.hip.cpp +++ b/13_omp_offload_pi/pi_hip.hip.cpp @@ -1,8 +1,11 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" #include <hip/hip_runtime.h> + + __global__ void pi_calc(long long count, double * result) { extern __shared__ volatile double sh_results[]; @@ -29,6 +32,8 @@ __global__ void pi_calc(long long count, double * result) atomicAdd(result, sh_results[0]); } + + int main(int argc, char **argv) { if(argc <= 1) @@ -47,14 +52,19 @@ int main(int argc, char **argv) int bpg = 500; int tpb = 1024; int shmem_size = tpb * sizeof(double); + + timer t(true); pi_calc<<< bpg, tpb, shmem_size >>>(count, d_result); + hipDeviceSynchronize(); + t.stop(); hipMemcpy(&result, d_result, sizeof(double), hipMemcpyDeviceToHost); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/pi_omp.cpp b/13_omp_offload_pi/pi_omp.cpp index af7e28010aa915e4a592fd4025440db2697b57a0..87a033c7283e447a2adff654e1d8822e64296491 100644 --- a/13_omp_offload_pi/pi_omp.cpp +++ b/13_omp_offload_pi/pi_omp.cpp @@ -1,6 +1,7 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" int main(int argc, char **argv) { @@ -14,17 +15,20 @@ int main(int argc, char **argv) double result = 0; + timer t(true); #pragma omp parallel for reduction(+:result) for(long long i = 0; i < count; i++) { double x = (i + 0.5) / count; result += 4 / (x * x + 1); } + t.stop(); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/pi_omp_offload.cpp b/13_omp_offload_pi/pi_omp_offload.cpp index 23808206ead2c51f5494c3cb19405e9c63f97a7e..cbb020bb9aed7b30bc11e5681adf1b01d6202bdc 100644 --- a/13_omp_offload_pi/pi_omp_offload.cpp +++ b/13_omp_offload_pi/pi_omp_offload.cpp @@ -1,6 +1,7 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" int main(int argc, char **argv) { @@ -14,17 +15,20 @@ int main(int argc, char **argv) double result = 0; + timer t(true); #pragma omp target teams distribute parallel for map(to:count) map(tofrom:result) reduction(+:result) for(long long i = 0; i < count; i++) { double x = (i + 0.5) / count; result += 4 / (x * x + 1); } + t.stop(); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/pi_seq.cpp b/13_omp_offload_pi/pi_seq.cpp index f8b06ac750792b05dd6a1394a2f0dda939436664..1195c1eeaca37d220e3e444aab475bbcb8039985 100644 --- a/13_omp_offload_pi/pi_seq.cpp +++ b/13_omp_offload_pi/pi_seq.cpp @@ -1,6 +1,7 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" int main(int argc, char **argv) { @@ -14,16 +15,19 @@ int main(int argc, char **argv) double result = 0; + timer t(true); for(long long i = 0; i < count; i++) { double x = (i + 0.5) / count; result += 4 / (x * x + 1); } + t.stop(); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/timer.h b/13_omp_offload_pi/timer.h new file mode 100644 index 0000000000000000000000000000000000000000..a4620595cb680561d9bb29ca1ef63fd4732b4aca --- /dev/null +++ b/13_omp_offload_pi/timer.h @@ -0,0 +1,58 @@ +#pragma once + +#include <chrono> + +class timer +{ +private: + std::chrono::steady_clock::duration total_duration; + std::chrono::steady_clock::duration lap_duration; + std::chrono::steady_clock::time_point start_point; + static constexpr double ticks_to_sec = ((double)std::chrono::steady_clock::period::num) / std::chrono::steady_clock::period::den; // duration of one tick +public: + timer(bool start = false) + { + this->reset(); + + if(start) + this->start(); + } + + void start() + { + this->start_point = std::chrono::steady_clock::now(); + } + + void stop() + { + auto stop_point = std::chrono::steady_clock::now(); + this->lap_duration = stop_point - start_point; + this->total_duration += this->lap_duration; + } + + void reset() + { + this->total_duration = std::chrono::steady_clock::duration(0); + this->lap_duration = std::chrono::steady_clock::duration(0); + } + + std::chrono::steady_clock::duration get_lap_duration() const + { + return this->lap_duration; + } + + std::chrono::steady_clock::duration get_total_duration() const + { + return this->total_duration; + } + + double get_lap_time() const + { + return (double)this->lap_duration.count() * ticks_to_sec; + } + + double get_total_time() const + { + return this->total_duration.count() * ticks_to_sec; + } +}; \ No newline at end of file