From d79e40b3faaf3fe45b2482d3113a5fca20cae5e5 Mon Sep 17 00:00:00 2001
From: Jan Siwiec <jan.siwiec@vsb.cz>
Date: Wed, 17 Jan 2024 13:13:44 +0100
Subject: [PATCH] Update power10.md

---
 docs.it4i/cs/guides/power10.md | 71 ++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/docs.it4i/cs/guides/power10.md b/docs.it4i/cs/guides/power10.md
index 265293336..28c55dad8 100644
--- a/docs.it4i/cs/guides/power10.md
+++ b/docs.it4i/cs/guides/power10.md
@@ -25,6 +25,7 @@ The platform offers both `GNU` based and proprietary IBM toolchains for building
 ## Building Applications
 
 Our sample application depends on `BLAS`, therefore we start by loading following modules (regardless of which toolchain we want to use):
+
 ```
 ml GCC OpenBLAS
 ```
@@ -32,10 +33,13 @@ ml GCC OpenBLAS
 ### GCC Toolchain
 
 In the case of GCC toolchain we can go ahead and compile the application as usual using either `g++`
+
 ```
 g++ -lopenblas hello.cpp -o hello
 ```
+
 or `gfortran`
+
 ```
 gfortran -lopenblas hello.f90 -o hello
 ```
@@ -44,6 +48,7 @@ as usual.
 ### IBM Toolchain
 
 The IBM toolchain requires additional environment setup as it is installed in `/opt/ibm` and is not exposed as a module
+
 ```
 IBM_ROOT=/opt/ibm
 OPENXLC_ROOT=$IBM_ROOT/openxlC/17.1.1
@@ -57,10 +62,12 @@ export LD_LIBRARY_PATH=$OPENXLF_ROOT/lib:$LD_LIBRARY_PATH
 ```
 
 from there we can use either `ibm-clang++`
+
 ```
 ibm-clang++ -lopenblas hello.cpp -o hello
 ```
 or `xlf`
+
 ```
 xlf -lopenblas hello.f90 -o hello
 ```
@@ -81,10 +88,12 @@ export LD_LIBRARY_PATH=$ESSL_ROOT/lib64:$LD_LIBRARY_PATH
 
 The simplest way to utilize `ESSL` in application, which already uses `BLAS` or `CBLAS` routines is to link with the provided `libessl.so`. This can be done by replacing `-lopenblas` with `-lessl` or `-lessl -lopenblas` (in case `ESSL` does not provide all required `BLAS` routines).
 In practice this can look like
+
 ```
 g++ -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.cpp -o hello
 ```
 or
+
 ```
 gfortran -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.f90 -o hello
 ```
@@ -95,6 +104,7 @@ and similarly for IBM compilers (`ibm-clang++` and `xlf`).
 The `hello world` example application (written in `C++` and `Fortran`) uses simple stationary probability vector estimation to illustrate use of GEMM (BLAS 3 routine).
 
 Stationary probability vector estimation in `C++`:
+
 ```c++
 #include <iostream>
 #include <vector>
@@ -107,45 +117,45 @@ const size_t MATRIX_SIZE = 1024;
 int main(int argc, char *argv[])
 {
     const size_t matrixElements = MATRIX_SIZE*MATRIX_SIZE;
-    
+
     std::vector<float> a(matrixElements, 1.0f / float(MATRIX_SIZE));
-    
+
     for(size_t i = 0; i < MATRIX_SIZE; ++i)
         a[i] = 0.5f / (float(MATRIX_SIZE) - 1.0f);
     a[0] = 0.5f;
-    
+
     std::vector<float> w1(matrixElements, 0.0f);
     std::vector<float> w2(matrixElements, 0.0f);
-    
+
     std::copy(a.begin(), a.end(), w1.begin());
-    
+
     std::vector<float> *t1, *t2;
     t1 = &w1;
     t2 = &w2;
-    
+
     auto c1 = std::chrono::steady_clock::now();
-    
+
     for(size_t i = 0; i < ITERATIONS; ++i)
     {
         std::fill(t2->begin(), t2->end(), 0.0f);
-        
-        cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, 
+
+        cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE,
                     1.0f, t1->data(), MATRIX_SIZE,
-                    a.data(), MATRIX_SIZE, 
+                    a.data(), MATRIX_SIZE,
                     1.0f, t2->data(), MATRIX_SIZE);
-        
+
         std::swap(t1, t2);
     }
-    
+
     auto c2 = std::chrono::steady_clock::now();
-    
+
     for(size_t i = 0; i < MATRIX_SIZE; ++i)
     {
         std::cout << (*t1)[i*MATRIX_SIZE + i] << " ";
     }
-    
+
     std::cout << std::endl;
-    
+
     std::cout << "Elapsed Time: " << std::chrono::duration<double>(c2 - c1).count() << std::endl;
 
     return 0;
@@ -153,57 +163,58 @@ int main(int argc, char *argv[])
 ```
 
 Stationary probability vector estimation in `Fortran`:
+
 ```fortran
 program main
     implicit none
-    
+
     integer :: matrix_size, iterations
     integer :: i
     real, allocatable, target :: a(:,:), w1(:,:), w2(:,:)
     real, dimension(:,:), contiguous, pointer :: t1, t2, tmp
     real, pointer :: out_data(:), out_diag(:)
     integer :: cr, cm, c1, c2
-    
+
     iterations  = 32
     matrix_size = 1024
-    
+
     call system_clock(count_rate=cr)
     call system_clock(count_max=cm)
-    
+
     allocate(a(matrix_size, matrix_size))
     allocate(w1(matrix_size, matrix_size))
     allocate(w2(matrix_size, matrix_size))
-    
+
     a(:,:) = 1.0 / real(matrix_size)
     a(:,1) = 0.5 / real(matrix_size - 1)
     a(1,1) = 0.5
-    
+
     w1 = a
     w2(:,:) = 0.0
-    
+
     t1 => w1
     t2 => w2
-    
+
     call system_clock(c1)
-    
+
     do i = 0, iterations
         t2(:,:) = 0.0
-        
+
         call sgemm('N', 'N', matrix_size, matrix_size, matrix_size, 1.0, t1, matrix_size, a, matrix_size, 1.0, t2, matrix_size)
-        
+
         tmp => t1
         t1  => t2
         t2  => tmp
     end do
-    
+
     call system_clock(c2)
-    
+
     out_data(1:size(t1)) => t1
     out_diag => out_data(1::matrix_size+1)
-    
+
     print *, out_diag
     print *, "Elapsed Time: ", (c2 - c1) / real(cr)
-    
+
     deallocate(a)
     deallocate(w1)
     deallocate(w2)
-- 
GitLab