Skip to content
Snippets Groups Projects
Commit 78b2d16c authored by Ondrej Meca's avatar Ondrej Meca
Browse files

STEP 5.3: evaluate performance of blocked variant

parent 756a9632
Branches
No related tags found
No related merge requests found
...@@ -18,6 +18,12 @@ target_link_libraries(multiply matrix) ...@@ -18,6 +18,12 @@ target_link_libraries(multiply matrix)
add_executable(print src/app/print.cpp) add_executable(print src/app/print.cpp)
target_link_libraries(print matrix) target_link_libraries(print matrix)
add_executable(bench_simple src/bench/simple.cpp)
target_link_libraries(bench_simple matrix)
add_executable(bench_blocked src/bench/blocked.cpp)
target_link_libraries(bench_blocked matrix)
enable_testing() enable_testing()
add_executable(testBlocked tests/test.cpp) add_executable(testBlocked tests/test.cpp)
target_link_libraries(testBlocked matrix) target_link_libraries(testBlocked matrix)
......
#!/bin/bash
#PBS -S /bin/bash
#PBS -l select=1:ncpus=128:ompthreads=128
#PBS -q qexp
#PBS -N blocked
ml icc
cd $PBS_O_WORKDIR
for BLOCK in {2..6}
do
echo BLOCK=$((2**BLOCK))
for AFFINITY in "none" "compact" "scatter" "balanced"
do
echo ""
echo KMP_AFFINITY=$AFFINITY
for THREADS in {0..7}
do
echo OMP_NUM_THREADS=$((2**$THREADS))
KMP_AFFINITY=$AFFINITY OMP_NUM_THREADS=$((2**$THREADS)) ./build/bench_blocked tests/large/A.bin tests/large/B.bin C.bin $((2**BLOCK))
done
done
done
#!/bin/bash
#PBS -S /bin/bash
#PBS -l select=1:ncpus=128:ompthreads=128
#PBS -q qexp
#PBS -N simple
ml icc
cd $PBS_O_WORKDIR
for AFFINITY in "none" "compact" "scatter" "balanced"
do
echo ""
echo KMP_AFFINITY=$AFFINITY
for THREADS in {0..7}
do
echo OMP_NUM_THREADS=$((2**$THREADS))
KMP_AFFINITY=$AFFINITY OMP_NUM_THREADS=$((2**$THREADS)) ./build/bench_simple tests/large/A.bin tests/large/B.bin C.bin
done
done
#include "matrix.h"
#include <omp.h>
#include <iostream>
#include <cstdlib>
int main(int argc, char **argv)
{
if (argc != 5) {
std::cerr << "use: sequential 'A' 'B' 'output_file' 'block_size'\n";
exit(1);
}
double sload = omp_get_wtime();
Matrix C, A, B;
load(A, argv[1]);
load(B, argv[2]);
double eload = omp_get_wtime();
printf("Matrices loaded: A[%d, %d], B[%d, %d]\n", A.rows, A.cols, B.rows, B.cols);
initMult(C, A, B);
double smult = omp_get_wtime();
// mult(C, A, B);
multBlocked(C, A, B, std::atoi(argv[4]));
double emult = omp_get_wtime();
printf("Matrices multiplied: C[%d, %d]\n", C.rows, C.cols);
double sstore = omp_get_wtime();
store(C, argv[3]);
double estore = omp_get_wtime();
printf("Load, Mult, Store; %e; %e; %e;\n", eload - sload, emult - smult, estore - sstore);
}
#include "matrix.h"
#include <omp.h>
#include <iostream>
int main(int argc, char **argv)
{
if (argc != 4) {
std::cerr << "use: sequential 'A' 'B' 'output_file'\n";
exit(1);
}
double sload = omp_get_wtime();
Matrix C, A, B;
load(A, argv[1]);
load(B, argv[2]);
double eload = omp_get_wtime();
printf("Matrices loaded: A[%d, %d], B[%d, %d]\n", A.rows, A.cols, B.rows, B.cols);
initMult(C, A, B);
double smult = omp_get_wtime();
mult(C, A, B);
// multBlocked(C, A, B, 8);
double emult = omp_get_wtime();
printf("Matrices multiplied: C[%d, %d]\n", C.rows, C.cols);
double sstore = omp_get_wtime();
store(C, argv[3]);
double estore = omp_get_wtime();
printf("Load, Mult, Store; %e; %e; %e;\n", eload - sload, emult - smult, estore - sstore);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment