From 7992139e56f0d24012c65c25160ad54d070e313c Mon Sep 17 00:00:00 2001 From: Jakub Homola <jakub.homola@vsb.cz> Date: Mon, 10 Jan 2022 16:04:25 +0100 Subject: [PATCH] omp offload pi timer changes --- 13_omp_offload_pi/Makefile | 24 +++++------- 13_omp_offload_pi/pi_hip.hip.cpp | 12 +++++- 13_omp_offload_pi/pi_omp.cpp | 6 ++- 13_omp_offload_pi/pi_omp_offload.cpp | 6 ++- 13_omp_offload_pi/pi_seq.cpp | 6 ++- 13_omp_offload_pi/timer.h | 58 ++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 18 deletions(-) create mode 100644 13_omp_offload_pi/timer.h diff --git a/13_omp_offload_pi/Makefile b/13_omp_offload_pi/Makefile index b7b59bc..422296d 100644 --- a/13_omp_offload_pi/Makefile +++ b/13_omp_offload_pi/Makefile @@ -2,26 +2,22 @@ CLANG=/opt/rocm/llvm/bin/clang++ +.PHONY: compile clean run pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x -.PHONY: all clean run seq omp offload_clang offload_aomp hip -all: seq omp offload_clang offload_aomp hip + +compile: pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x clean: rm -rf *.x -run: all - @echo "Sequential" && /usr/bin/time -f '%e' ./pi_seq.x 1000000000 > /dev/null - @echo "OpenMP" && /usr/bin/time -f '%e' ./pi_omp.x 10000000000 > /dev/null - @echo "OpenMP offloading Clang" && /usr/bin/time -f '%e' ./pi_gpu_clang.x 10000000000 > /dev/null - @echo "OpenMP offloading AOMP" && /usr/bin/time -f '%e' ./pi_gpu_aomp.x 10000000000 > /dev/null - @echo "Classic HIP" && /usr/bin/time -f '%e' ./pi_hip.x 10000000000 > /dev/null - -seq: pi_seq.x -omp: pi_omp.x -offload_clang: pi_gpu_clang.x -offload_aomp: pi_gpu_aomp.x -hip: pi_hip.x +run: compile + @echo "Sequential" && ./pi_seq.x 1000000000 && echo + @echo "OpenMP" && ./pi_omp.x 10000000000 && echo + @echo "OpenMP offloading Clang" && ./pi_gpu_clang.x 10000000000 && echo + @echo "OpenMP offloading AOMP" && ./pi_gpu_aomp.x 10000000000 && echo + @echo "Classic HIP" && ./pi_hip.x 10000000000 && echo + pi_seq.x: pi_seq.cpp diff --git a/13_omp_offload_pi/pi_hip.hip.cpp b/13_omp_offload_pi/pi_hip.hip.cpp index 087689f..4783491 100644 --- a/13_omp_offload_pi/pi_hip.hip.cpp +++ b/13_omp_offload_pi/pi_hip.hip.cpp @@ -1,8 +1,11 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" #include <hip/hip_runtime.h> + + __global__ void pi_calc(long long count, double * result) { extern __shared__ volatile double sh_results[]; @@ -29,6 +32,8 @@ __global__ void pi_calc(long long count, double * result) atomicAdd(result, sh_results[0]); } + + int main(int argc, char **argv) { if(argc <= 1) @@ -47,14 +52,19 @@ int main(int argc, char **argv) int bpg = 500; int tpb = 1024; int shmem_size = tpb * sizeof(double); + + timer t(true); pi_calc<<< bpg, tpb, shmem_size >>>(count, d_result); + hipDeviceSynchronize(); + t.stop(); hipMemcpy(&result, d_result, sizeof(double), hipMemcpyDeviceToHost); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/pi_omp.cpp b/13_omp_offload_pi/pi_omp.cpp index af7e280..87a033c 100644 --- a/13_omp_offload_pi/pi_omp.cpp +++ b/13_omp_offload_pi/pi_omp.cpp @@ -1,6 +1,7 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" int main(int argc, char **argv) { @@ -14,17 +15,20 @@ int main(int argc, char **argv) double result = 0; + timer t(true); #pragma omp parallel for reduction(+:result) for(long long i = 0; i < count; i++) { double x = (i + 0.5) / count; result += 4 / (x * x + 1); } + t.stop(); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/pi_omp_offload.cpp b/13_omp_offload_pi/pi_omp_offload.cpp index 2380820..cbb020b 100644 --- a/13_omp_offload_pi/pi_omp_offload.cpp +++ b/13_omp_offload_pi/pi_omp_offload.cpp @@ -1,6 +1,7 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" int main(int argc, char **argv) { @@ -14,17 +15,20 @@ int main(int argc, char **argv) double result = 0; + timer t(true); #pragma omp target teams distribute parallel for map(to:count) map(tofrom:result) reduction(+:result) for(long long i = 0; i < count; i++) { double x = (i + 0.5) / count; result += 4 / (x * x + 1); } + t.stop(); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/pi_seq.cpp b/13_omp_offload_pi/pi_seq.cpp index f8b06ac..1195c1e 100644 --- a/13_omp_offload_pi/pi_seq.cpp +++ b/13_omp_offload_pi/pi_seq.cpp @@ -1,6 +1,7 @@ #include <cstdio> #include <cstdlib> #include <cmath> +#include "timer.h" int main(int argc, char **argv) { @@ -14,16 +15,19 @@ int main(int argc, char **argv) double result = 0; + timer t(true); for(long long i = 0; i < count; i++) { double x = (i + 0.5) / count; result += 4 / (x * x + 1); } + t.stop(); result /= count; printf("Pi was approximated with %12lld points to be %.18f\n", count, result); - printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Real value of pi from cmath library is %.18f\n", M_PI); + printf("Computation time: %7.3f s\n", t.get_total_time()); return 0; } diff --git a/13_omp_offload_pi/timer.h b/13_omp_offload_pi/timer.h new file mode 100644 index 0000000..a462059 --- /dev/null +++ b/13_omp_offload_pi/timer.h @@ -0,0 +1,58 @@ +#pragma once + +#include <chrono> + +class timer +{ +private: + std::chrono::steady_clock::duration total_duration; + std::chrono::steady_clock::duration lap_duration; + std::chrono::steady_clock::time_point start_point; + static constexpr double ticks_to_sec = ((double)std::chrono::steady_clock::period::num) / std::chrono::steady_clock::period::den; // duration of one tick +public: + timer(bool start = false) + { + this->reset(); + + if(start) + this->start(); + } + + void start() + { + this->start_point = std::chrono::steady_clock::now(); + } + + void stop() + { + auto stop_point = std::chrono::steady_clock::now(); + this->lap_duration = stop_point - start_point; + this->total_duration += this->lap_duration; + } + + void reset() + { + this->total_duration = std::chrono::steady_clock::duration(0); + this->lap_duration = std::chrono::steady_clock::duration(0); + } + + std::chrono::steady_clock::duration get_lap_duration() const + { + return this->lap_duration; + } + + std::chrono::steady_clock::duration get_total_duration() const + { + return this->total_duration; + } + + double get_lap_time() const + { + return (double)this->lap_duration.count() * ticks_to_sec; + } + + double get_total_time() const + { + return this->total_duration.count() * ticks_to_sec; + } +}; \ No newline at end of file -- GitLab