Skip to content
Snippets Groups Projects
Commit d79e40b3 authored by Jan Siwiec's avatar Jan Siwiec
Browse files

Update power10.md

parent 51ad1f58
No related branches found
No related tags found
No related merge requests found
Pipeline #36162 passed with warnings
...@@ -25,6 +25,7 @@ The platform offers both `GNU` based and proprietary IBM toolchains for building ...@@ -25,6 +25,7 @@ The platform offers both `GNU` based and proprietary IBM toolchains for building
## Building Applications ## Building Applications
Our sample application depends on `BLAS`, therefore we start by loading following modules (regardless of which toolchain we want to use): Our sample application depends on `BLAS`, therefore we start by loading following modules (regardless of which toolchain we want to use):
``` ```
ml GCC OpenBLAS ml GCC OpenBLAS
``` ```
...@@ -32,10 +33,13 @@ ml GCC OpenBLAS ...@@ -32,10 +33,13 @@ ml GCC OpenBLAS
### GCC Toolchain ### GCC Toolchain
In the case of GCC toolchain we can go ahead and compile the application as usual using either `g++` In the case of GCC toolchain we can go ahead and compile the application as usual using either `g++`
``` ```
g++ -lopenblas hello.cpp -o hello g++ -lopenblas hello.cpp -o hello
``` ```
or `gfortran` or `gfortran`
``` ```
gfortran -lopenblas hello.f90 -o hello gfortran -lopenblas hello.f90 -o hello
``` ```
...@@ -44,6 +48,7 @@ as usual. ...@@ -44,6 +48,7 @@ as usual.
### IBM Toolchain ### IBM Toolchain
The IBM toolchain requires additional environment setup as it is installed in `/opt/ibm` and is not exposed as a module The IBM toolchain requires additional environment setup as it is installed in `/opt/ibm` and is not exposed as a module
``` ```
IBM_ROOT=/opt/ibm IBM_ROOT=/opt/ibm
OPENXLC_ROOT=$IBM_ROOT/openxlC/17.1.1 OPENXLC_ROOT=$IBM_ROOT/openxlC/17.1.1
...@@ -57,10 +62,12 @@ export LD_LIBRARY_PATH=$OPENXLF_ROOT/lib:$LD_LIBRARY_PATH ...@@ -57,10 +62,12 @@ export LD_LIBRARY_PATH=$OPENXLF_ROOT/lib:$LD_LIBRARY_PATH
``` ```
from there we can use either `ibm-clang++` from there we can use either `ibm-clang++`
``` ```
ibm-clang++ -lopenblas hello.cpp -o hello ibm-clang++ -lopenblas hello.cpp -o hello
``` ```
or `xlf` or `xlf`
``` ```
xlf -lopenblas hello.f90 -o hello xlf -lopenblas hello.f90 -o hello
``` ```
...@@ -81,10 +88,12 @@ export LD_LIBRARY_PATH=$ESSL_ROOT/lib64:$LD_LIBRARY_PATH ...@@ -81,10 +88,12 @@ export LD_LIBRARY_PATH=$ESSL_ROOT/lib64:$LD_LIBRARY_PATH
The simplest way to utilize `ESSL` in application, which already uses `BLAS` or `CBLAS` routines is to link with the provided `libessl.so`. This can be done by replacing `-lopenblas` with `-lessl` or `-lessl -lopenblas` (in case `ESSL` does not provide all required `BLAS` routines). The simplest way to utilize `ESSL` in application, which already uses `BLAS` or `CBLAS` routines is to link with the provided `libessl.so`. This can be done by replacing `-lopenblas` with `-lessl` or `-lessl -lopenblas` (in case `ESSL` does not provide all required `BLAS` routines).
In practice this can look like In practice this can look like
``` ```
g++ -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.cpp -o hello g++ -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.cpp -o hello
``` ```
or or
``` ```
gfortran -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.f90 -o hello gfortran -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.f90 -o hello
``` ```
...@@ -95,6 +104,7 @@ and similarly for IBM compilers (`ibm-clang++` and `xlf`). ...@@ -95,6 +104,7 @@ and similarly for IBM compilers (`ibm-clang++` and `xlf`).
The `hello world` example application (written in `C++` and `Fortran`) uses simple stationary probability vector estimation to illustrate use of GEMM (BLAS 3 routine). The `hello world` example application (written in `C++` and `Fortran`) uses simple stationary probability vector estimation to illustrate use of GEMM (BLAS 3 routine).
Stationary probability vector estimation in `C++`: Stationary probability vector estimation in `C++`:
```c++ ```c++
#include <iostream> #include <iostream>
#include <vector> #include <vector>
...@@ -107,45 +117,45 @@ const size_t MATRIX_SIZE = 1024; ...@@ -107,45 +117,45 @@ const size_t MATRIX_SIZE = 1024;
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
const size_t matrixElements = MATRIX_SIZE*MATRIX_SIZE; const size_t matrixElements = MATRIX_SIZE*MATRIX_SIZE;
std::vector<float> a(matrixElements, 1.0f / float(MATRIX_SIZE)); std::vector<float> a(matrixElements, 1.0f / float(MATRIX_SIZE));
for(size_t i = 0; i < MATRIX_SIZE; ++i) for(size_t i = 0; i < MATRIX_SIZE; ++i)
a[i] = 0.5f / (float(MATRIX_SIZE) - 1.0f); a[i] = 0.5f / (float(MATRIX_SIZE) - 1.0f);
a[0] = 0.5f; a[0] = 0.5f;
std::vector<float> w1(matrixElements, 0.0f); std::vector<float> w1(matrixElements, 0.0f);
std::vector<float> w2(matrixElements, 0.0f); std::vector<float> w2(matrixElements, 0.0f);
std::copy(a.begin(), a.end(), w1.begin()); std::copy(a.begin(), a.end(), w1.begin());
std::vector<float> *t1, *t2; std::vector<float> *t1, *t2;
t1 = &w1; t1 = &w1;
t2 = &w2; t2 = &w2;
auto c1 = std::chrono::steady_clock::now(); auto c1 = std::chrono::steady_clock::now();
for(size_t i = 0; i < ITERATIONS; ++i) for(size_t i = 0; i < ITERATIONS; ++i)
{ {
std::fill(t2->begin(), t2->end(), 0.0f); std::fill(t2->begin(), t2->end(), 0.0f);
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE,
1.0f, t1->data(), MATRIX_SIZE, 1.0f, t1->data(), MATRIX_SIZE,
a.data(), MATRIX_SIZE, a.data(), MATRIX_SIZE,
1.0f, t2->data(), MATRIX_SIZE); 1.0f, t2->data(), MATRIX_SIZE);
std::swap(t1, t2); std::swap(t1, t2);
} }
auto c2 = std::chrono::steady_clock::now(); auto c2 = std::chrono::steady_clock::now();
for(size_t i = 0; i < MATRIX_SIZE; ++i) for(size_t i = 0; i < MATRIX_SIZE; ++i)
{ {
std::cout << (*t1)[i*MATRIX_SIZE + i] << " "; std::cout << (*t1)[i*MATRIX_SIZE + i] << " ";
} }
std::cout << std::endl; std::cout << std::endl;
std::cout << "Elapsed Time: " << std::chrono::duration<double>(c2 - c1).count() << std::endl; std::cout << "Elapsed Time: " << std::chrono::duration<double>(c2 - c1).count() << std::endl;
return 0; return 0;
...@@ -153,57 +163,58 @@ int main(int argc, char *argv[]) ...@@ -153,57 +163,58 @@ int main(int argc, char *argv[])
``` ```
Stationary probability vector estimation in `Fortran`: Stationary probability vector estimation in `Fortran`:
```fortran ```fortran
program main program main
implicit none implicit none
integer :: matrix_size, iterations integer :: matrix_size, iterations
integer :: i integer :: i
real, allocatable, target :: a(:,:), w1(:,:), w2(:,:) real, allocatable, target :: a(:,:), w1(:,:), w2(:,:)
real, dimension(:,:), contiguous, pointer :: t1, t2, tmp real, dimension(:,:), contiguous, pointer :: t1, t2, tmp
real, pointer :: out_data(:), out_diag(:) real, pointer :: out_data(:), out_diag(:)
integer :: cr, cm, c1, c2 integer :: cr, cm, c1, c2
iterations = 32 iterations = 32
matrix_size = 1024 matrix_size = 1024
call system_clock(count_rate=cr) call system_clock(count_rate=cr)
call system_clock(count_max=cm) call system_clock(count_max=cm)
allocate(a(matrix_size, matrix_size)) allocate(a(matrix_size, matrix_size))
allocate(w1(matrix_size, matrix_size)) allocate(w1(matrix_size, matrix_size))
allocate(w2(matrix_size, matrix_size)) allocate(w2(matrix_size, matrix_size))
a(:,:) = 1.0 / real(matrix_size) a(:,:) = 1.0 / real(matrix_size)
a(:,1) = 0.5 / real(matrix_size - 1) a(:,1) = 0.5 / real(matrix_size - 1)
a(1,1) = 0.5 a(1,1) = 0.5
w1 = a w1 = a
w2(:,:) = 0.0 w2(:,:) = 0.0
t1 => w1 t1 => w1
t2 => w2 t2 => w2
call system_clock(c1) call system_clock(c1)
do i = 0, iterations do i = 0, iterations
t2(:,:) = 0.0 t2(:,:) = 0.0
call sgemm('N', 'N', matrix_size, matrix_size, matrix_size, 1.0, t1, matrix_size, a, matrix_size, 1.0, t2, matrix_size) call sgemm('N', 'N', matrix_size, matrix_size, matrix_size, 1.0, t1, matrix_size, a, matrix_size, 1.0, t2, matrix_size)
tmp => t1 tmp => t1
t1 => t2 t1 => t2
t2 => tmp t2 => tmp
end do end do
call system_clock(c2) call system_clock(c2)
out_data(1:size(t1)) => t1 out_data(1:size(t1)) => t1
out_diag => out_data(1::matrix_size+1) out_diag => out_data(1::matrix_size+1)
print *, out_diag print *, out_diag
print *, "Elapsed Time: ", (c2 - c1) / real(cr) print *, "Elapsed Time: ", (c2 - c1) / real(cr)
deallocate(a) deallocate(a)
deallocate(w1) deallocate(w1)
deallocate(w2) deallocate(w2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment