From 7992139e56f0d24012c65c25160ad54d070e313c Mon Sep 17 00:00:00 2001
From: Jakub Homola <jakub.homola@vsb.cz>
Date: Mon, 10 Jan 2022 16:04:25 +0100
Subject: [PATCH] omp offload pi timer changes

---
 13_omp_offload_pi/Makefile           | 24 +++++-------
 13_omp_offload_pi/pi_hip.hip.cpp     | 12 +++++-
 13_omp_offload_pi/pi_omp.cpp         |  6 ++-
 13_omp_offload_pi/pi_omp_offload.cpp |  6 ++-
 13_omp_offload_pi/pi_seq.cpp         |  6 ++-
 13_omp_offload_pi/timer.h            | 58 ++++++++++++++++++++++++++++
 6 files changed, 94 insertions(+), 18 deletions(-)
 create mode 100644 13_omp_offload_pi/timer.h

diff --git a/13_omp_offload_pi/Makefile b/13_omp_offload_pi/Makefile
index b7b59bc..422296d 100644
--- a/13_omp_offload_pi/Makefile
+++ b/13_omp_offload_pi/Makefile
@@ -2,26 +2,22 @@
 CLANG=/opt/rocm/llvm/bin/clang++
 
 
+.PHONY: compile clean run pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x
 
-.PHONY: all clean run seq omp offload_clang offload_aomp hip
 
-all: seq omp offload_clang offload_aomp hip
+
+compile: pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x
 
 clean:
 	rm -rf *.x
 
-run: all
-	@echo "Sequential"              && /usr/bin/time -f '%e' ./pi_seq.x       1000000000  > /dev/null
-	@echo "OpenMP"                  && /usr/bin/time -f '%e' ./pi_omp.x       10000000000 > /dev/null
-	@echo "OpenMP offloading Clang" && /usr/bin/time -f '%e' ./pi_gpu_clang.x 10000000000 > /dev/null
-	@echo "OpenMP offloading AOMP"  && /usr/bin/time -f '%e' ./pi_gpu_aomp.x  10000000000 > /dev/null
-	@echo "Classic HIP"             && /usr/bin/time -f '%e' ./pi_hip.x       10000000000 > /dev/null
-
-seq: pi_seq.x
-omp: pi_omp.x
-offload_clang: pi_gpu_clang.x
-offload_aomp: pi_gpu_aomp.x
-hip: pi_hip.x
+run: compile
+	@echo "Sequential"                &&   ./pi_seq.x       1000000000    &&   echo
+	@echo "OpenMP"                    &&   ./pi_omp.x       10000000000   &&   echo
+	@echo "OpenMP offloading Clang"   &&   ./pi_gpu_clang.x 10000000000   &&   echo
+	@echo "OpenMP offloading AOMP"    &&   ./pi_gpu_aomp.x  10000000000   &&   echo
+	@echo "Classic HIP"               &&   ./pi_hip.x       10000000000   &&   echo
+
 
 
 pi_seq.x: pi_seq.cpp
diff --git a/13_omp_offload_pi/pi_hip.hip.cpp b/13_omp_offload_pi/pi_hip.hip.cpp
index 087689f..4783491 100644
--- a/13_omp_offload_pi/pi_hip.hip.cpp
+++ b/13_omp_offload_pi/pi_hip.hip.cpp
@@ -1,8 +1,11 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cmath>
+#include "timer.h"
 #include <hip/hip_runtime.h>
 
+
+
 __global__ void pi_calc(long long count, double * result)
 {
     extern __shared__ volatile double sh_results[];
@@ -29,6 +32,8 @@ __global__ void pi_calc(long long count, double * result)
     atomicAdd(result, sh_results[0]);
 }
 
+
+
 int main(int argc, char **argv)
 {
     if(argc <= 1)
@@ -47,14 +52,19 @@ int main(int argc, char **argv)
     int bpg = 500;
     int tpb = 1024;
     int shmem_size = tpb * sizeof(double);
+    
+    timer t(true);
     pi_calc<<< bpg, tpb, shmem_size >>>(count, d_result);
+    hipDeviceSynchronize();
+    t.stop();
 
     hipMemcpy(&result, d_result, sizeof(double), hipMemcpyDeviceToHost);
 
     result /= count;
 
     printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
-    printf("Real value of pi from cmath library is           %.18f\n", M_PI);
+    printf("Real value of pi from cmath library is             %.18f\n", M_PI);
+    printf("Computation time: %7.3f s\n", t.get_total_time());
 
     return 0;
 }
diff --git a/13_omp_offload_pi/pi_omp.cpp b/13_omp_offload_pi/pi_omp.cpp
index af7e280..87a033c 100644
--- a/13_omp_offload_pi/pi_omp.cpp
+++ b/13_omp_offload_pi/pi_omp.cpp
@@ -1,6 +1,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cmath>
+#include "timer.h"
 
 int main(int argc, char **argv)
 {
@@ -14,17 +15,20 @@ int main(int argc, char **argv)
 
     double result = 0;
 
+    timer t(true);
 #pragma omp parallel for reduction(+:result)
     for(long long i = 0; i < count; i++)
     {
         double x = (i + 0.5) / count;
         result += 4 / (x * x + 1);
     }
+    t.stop();
 
     result /= count;
 
     printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
-    printf("Real value of pi from cmath library is           %.18f\n", M_PI);
+    printf("Real value of pi from cmath library is             %.18f\n", M_PI);
+    printf("Computation time: %7.3f s\n", t.get_total_time());
 
     return 0;
 }
diff --git a/13_omp_offload_pi/pi_omp_offload.cpp b/13_omp_offload_pi/pi_omp_offload.cpp
index 2380820..cbb020b 100644
--- a/13_omp_offload_pi/pi_omp_offload.cpp
+++ b/13_omp_offload_pi/pi_omp_offload.cpp
@@ -1,6 +1,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cmath>
+#include "timer.h"
 
 int main(int argc, char **argv)
 {
@@ -14,17 +15,20 @@ int main(int argc, char **argv)
 
     double result = 0;
 
+    timer t(true);
 #pragma omp target teams distribute parallel for map(to:count) map(tofrom:result) reduction(+:result)
     for(long long i = 0; i < count; i++)
     {
         double x = (i + 0.5) / count;
         result += 4 / (x * x + 1);
     }
+    t.stop();
 
     result /= count;
 
     printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
-    printf("Real value of pi from cmath library is           %.18f\n", M_PI);
+    printf("Real value of pi from cmath library is             %.18f\n", M_PI);
+    printf("Computation time: %7.3f s\n", t.get_total_time());
 
     return 0;
 }
diff --git a/13_omp_offload_pi/pi_seq.cpp b/13_omp_offload_pi/pi_seq.cpp
index f8b06ac..1195c1e 100644
--- a/13_omp_offload_pi/pi_seq.cpp
+++ b/13_omp_offload_pi/pi_seq.cpp
@@ -1,6 +1,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cmath>
+#include "timer.h"
 
 int main(int argc, char **argv)
 {
@@ -14,16 +15,19 @@ int main(int argc, char **argv)
 
     double result = 0;
 
+    timer t(true);
     for(long long i = 0; i < count; i++)
     {
         double x = (i + 0.5) / count;
         result += 4 / (x * x + 1);
     }
+    t.stop();
 
     result /= count;
 
     printf("Pi was approximated with %12lld points to be %.18f\n", count, result);
-    printf("Real value of pi from cmath library is           %.18f\n", M_PI);
+    printf("Real value of pi from cmath library is             %.18f\n", M_PI);
+    printf("Computation time: %7.3f s\n", t.get_total_time());
 
     return 0;
 }
diff --git a/13_omp_offload_pi/timer.h b/13_omp_offload_pi/timer.h
new file mode 100644
index 0000000..a462059
--- /dev/null
+++ b/13_omp_offload_pi/timer.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <chrono>
+
+class timer
+{
+private:
+	std::chrono::steady_clock::duration total_duration;
+	std::chrono::steady_clock::duration lap_duration;
+	std::chrono::steady_clock::time_point start_point;
+	static constexpr double ticks_to_sec = ((double)std::chrono::steady_clock::period::num) / std::chrono::steady_clock::period::den; // duration of one tick
+public:
+	timer(bool start = false)
+	{
+		this->reset();
+
+		if(start)
+			this->start();
+	}
+
+	void start()
+	{
+		this->start_point = std::chrono::steady_clock::now();
+	}
+
+	void stop()
+	{
+		auto stop_point = std::chrono::steady_clock::now();
+		this->lap_duration = stop_point - start_point;
+		this->total_duration += this->lap_duration;
+	}
+
+	void reset()
+	{
+		this->total_duration = std::chrono::steady_clock::duration(0);
+		this->lap_duration = std::chrono::steady_clock::duration(0);
+	}
+
+	std::chrono::steady_clock::duration get_lap_duration() const
+	{
+		return this->lap_duration;
+	}
+
+	std::chrono::steady_clock::duration get_total_duration() const
+	{
+		return this->total_duration;
+	}
+
+	double get_lap_time() const
+	{
+		return (double)this->lap_duration.count() * ticks_to_sec;
+	}
+
+	double get_total_time() const
+	{
+		return this->total_duration.count() * ticks_to_sec;
+	}
+};
\ No newline at end of file
-- 
GitLab