diff --git a/README.md b/README.md index 542346b15ea055169199f945f3aa11ff31574e1c..0fe8b7888894fd98a61585472e3b04ec16e47c18 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,45 @@ -# matmult +# Matrix Multiplication +A simple code for test several matrix multiplication approaches. The simplest one is implemented by algorithm using 3 for loops. More optimal *blocked* variant is based on the idea scratched [here](http://www.netlib.org/utk/papers/autoblock/node2.html). + +It is easy use OpenMP to make both algorithm run in parallel. Distributed memory parallel version can be implemented using systolic arrays described [here](https://www.mcs.anl.gov/~itf/dbpp/text/node45.html#figlasystolic). + +The following executable are provided (in directory `src/app/`): + - `$ generate rows cols A.bin` - generates a random matrix with provided number of rows and cols to binary file A.bin + - `$ print A.bin` - prints matrix A.bin to standard output + - `$ compare A.bin B.bin` - compares matrices A.bin and B.bin. Matrices are considered as equal if norm(A-B) < 1e-6 + - `$ multiply A.bin B.bin C.bin` - multiplies matrices A.bin and B.bin. Output is stored to file C.bin + +All functionality is implemented in the `Matrix` class in the `src/` directory. Directory `tests/` contains tests cases for this project. + +## STEP 0: Create git repository (10%) + +Your code should be forked from this repository and hosted on code.it4i.cz as a private project with access for all teachers. + +## STEP 1: Building the library (10%) + +Provide compilation script for your application (the script should run independently on a current path). Script should load all necessary modules and call `cmake`. + +## STEP 2: Analysis of the application (10%) + +Use `Arm map` (module Forge) to analyze a sequential run of your application with given use case (`tests/large`). Identify the most time consuming regions that can be parallelized by OpenMP. + +## STEP 3: Use OpenMP to run the application in parallel (10%) + +Put OpenMP pragmas to a correct positions with appropriate variables visibility in order to utilize more threads effectively. + +## STEP 4: Test the correctness of the code (10%) + +Create script that automatically check correctness of your application for at least 3 different test cases. Comparison can be implemented as comparison of outputs of sequential and parallel runs. + +## STEP 5: Test the behavior of the code on the Karolina cluster (40%) + +1. Implement time measurement for all parallel regions using omp_get_wtime(). +2. Create script for run strong scalability measurement (PBS script). +3. Evaluate strong scalability of measured regions up to 128 threads and different thread affinity (compact, scatter, balanced, none). +4. Evaluate behaviour for domain specific parameters of your applications (project dependent, maybe none). (Blocking implementation for this project) +5. Prepare charts for all measurements. + +## STEP 6: Presentation of your project (10%) + +Prepare presentation in form of slides (pptx, pdf). The slides should address all topics requested above. diff --git a/src/app/compare.cpp b/src/app/compare.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9183c3756d3edb70d2452ae48a319ab53225c713 --- /dev/null +++ b/src/app/compare.cpp @@ -0,0 +1,23 @@ + +#include "matrix.h" + +#include <iostream> +#include <chrono> + +int main(int argc, char **argv) +{ + if (argc != 3) { + std::cerr << "use: compare 'A' 'B'\n"; + exit(1); + } + + Matrix A, B; + load(A, argv[1]); + load(B, argv[2]); + + if (!compare(A, B)) { + std::cerr << "Matrices are different\n"; + } else { + std::cout << "Matrices are identical\n"; + } +} diff --git a/src/app/generate.cpp b/src/app/generate.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c788cb477eda12eaa579ebe61f7c22d41594cf27 --- /dev/null +++ b/src/app/generate.cpp @@ -0,0 +1,25 @@ + +#include "matrix.h" + +#include <iostream> +#include <sstream> +#include <fstream> + +int main(int argc, char **argv) +{ + if (argc != 4) { + std::cerr << "use: generator 'matrix_rows' 'matrix_columns' 'output_file'\n"; + exit(1); + } + + size_t rows, cols; + std::istringstream ( argv[1] ) >> rows; + std::istringstream ( argv[2] ) >> cols; + +// std::cout << "Generate matrix with size=" << rows << "x" << cols << " to file='" << argv[3] << "'\n"; + + Matrix A(rows, cols); + random(A); + store(A, argv[3]); +} + diff --git a/src/app/multiply.cpp b/src/app/multiply.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3e59aca49ccba2d7eeaf182529f18f712dd4b9a --- /dev/null +++ b/src/app/multiply.cpp @@ -0,0 +1,25 @@ + +#include "matrix.h" + +#include <iostream> +#include <chrono> + +int main(int argc, char **argv) +{ + if (argc != 4) { + std::cerr << "use: sequential 'A' 'B' 'output_file'\n"; + exit(1); + } + + Matrix C, A, B; + load(A, argv[1]); + load(B, argv[2]); + printf("Matrices loaded: A[%d, %d], B[%d, %d]\n", A.rows, A.cols, B.rows, B.cols); + + initMult(C, A, B); +// mult(C, A, B); + multBlocked(C, A, B, 8); + + printf("Matrices multiplied: C[%d, %d]\n", C.rows, C.cols); + store(C, argv[3]); +} diff --git a/src/app/print.cpp b/src/app/print.cpp new file mode 100644 index 0000000000000000000000000000000000000000..651d4820932b0808c858914c35bcb7ceebf66220 --- /dev/null +++ b/src/app/print.cpp @@ -0,0 +1,17 @@ + +#include "matrix.h" + +#include <iostream> + +int main(int argc, char **argv) +{ + if (argc != 2) { + std::cerr << "use: print 'input_file'\n"; + exit(1); + } + std::cout << "print matrix from file=" << argv[1] << "'\n"; + + Matrix A; + load(A, argv[1]); + print(A); +} diff --git a/src/matrix.cpp b/src/matrix.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b62d9a0c9e595b415b2d8a6ed3e5238ce9e47a27 --- /dev/null +++ b/src/matrix.cpp @@ -0,0 +1,110 @@ + +#include "matrix.h" + +#include <random> +#include <iostream> +#include <fstream> +#include <algorithm> + +bool compare(const Matrix &A, const Matrix &B) +{ + if (A.rows != B.rows || A.cols != B.cols) { + return false; + } + + double norm = 0; + for (int r = 0; r < A.rows; ++r) { + for (int c = 0; c < A.cols; ++c) { + norm += std::fabs(A(r, c) - B(r, c)); + } + } + return norm < 1e-6; +} + +void initMult(Matrix &C, const Matrix &A, const Matrix &B) +{ + if (A.cols != B.rows) { + std::cerr << "Matrices have invalid dimensions\n"; + } + C.resize(A.rows, B.cols); + std::fill(C.vals, C.vals + C.rows * C.cols, 0); +} + +void mult(Matrix &C, const Matrix &A, const Matrix &B) +{ + for (int i = 0; i < A.rows; ++i) { + for (int j = 0; j < B.cols; ++j) { + for (int k = 0; k < A.cols; ++k) { + C(i, j) += A(i, k) * B(k, j); + } + } + } +} + +void multBlocked(Matrix &C, const Matrix &A, const Matrix &B, int blockSize) +{ + int CRBlocks = C.rows / blockSize + C.rows % blockSize; + int CCBlocks = C.cols / blockSize + C.cols % blockSize; + int CKBlocks = A.cols / blockSize + A.cols % blockSize; + + for (int ci = 0; ci < CRBlocks; ++ci) { + int rbegin = ci * blockSize, rend = std::min((ci + 1) * blockSize, C.rows); + for (int cj = 0; cj < CCBlocks; ++cj) { + int cbegin = cj * blockSize, cend = std::min((cj + 1) * blockSize, C.cols); + for (int ck = 0; ck < CKBlocks; ++ck) { + int kbegin = ck * blockSize, kend = std::min((ck + 1) * blockSize, A.cols); + + for (int i = rbegin; i < rend; ++i) { + for (int j = cbegin; j < cend; ++j) { + for (int k = kbegin; k < kend; ++k) { + C(i, j) += A(i, k) * B(k, j); + } + } + } + } + } + } +} + +void random(Matrix &m) { + std::random_device rd; + std::mt19937 generator(rd()); + std::uniform_real_distribution<double> distribution(1.0, 2.0); + + for (int r = 0; r < m.rows; ++r) { + for (int c = 0; c < m.cols; ++c) { + m.vals[r * m.cols + c] = distribution(generator); + } + } +} + +void print(Matrix &m) +{ + for (int r = 0; r < m.rows; ++r) { + for (int c = 0; c < m.cols; ++c) { + printf(" %+f", m.vals[r * m.cols + c]); + } + printf("\n"); + } +} + +void load(Matrix &m, const char *file) +{ + std::ifstream os(file, std::ifstream::binary); + int rows, cols; + os.read(reinterpret_cast<char*>(&rows), sizeof(int)); + os.read(reinterpret_cast<char*>(&cols), sizeof(int)); + m.resize(rows, cols); + os.read(reinterpret_cast<char*>(m.vals), sizeof(double) * m.rows * m.cols); + os.close(); +} + +void store(const Matrix &m, const char *file) +{ + std::ofstream os(file, std::ofstream::binary); + os.write(reinterpret_cast<const char*>(&m.rows), sizeof(int)); + os.write(reinterpret_cast<const char*>(&m.cols), sizeof(int)); + os.write(reinterpret_cast<const char*>(m.vals), sizeof(double) * m.rows * m.cols); + os.close(); +} + diff --git a/src/matrix.h b/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..ee2f26fed2dc3456479e591646396ecc6557469f --- /dev/null +++ b/src/matrix.h @@ -0,0 +1,41 @@ + +#ifndef SRC_MATRIX_H_ +#define SRC_MATRIX_H_ + +struct Matrix { + Matrix(): rows{}, cols{}, vals{} {} + Matrix(int rows, int cols): rows(rows), cols(cols), vals(new double[rows * cols]) {} + ~Matrix() { delete[] vals; } + + Matrix(const Matrix &m) = delete; + Matrix(const Matrix &&m) = delete; + Matrix& operator=(const Matrix &m) = delete; + Matrix& operator=(const Matrix &&m) = delete; + + const double& operator()(int r, int c) const { return vals[r * cols + c]; } + double& operator()(int r, int c) { return vals[r * cols + c]; } + + void resize(int rows, int cols) + { + if (vals) delete vals; + this->rows = rows; + this->cols = cols; + this->vals = new double[rows * cols]; + } + + int rows, cols; + double *vals; +}; + +void random(Matrix &m); +void print(Matrix &m); +void load(Matrix &m, const char *file); +void store(const Matrix &m, const char *file); +bool compare(const Matrix &A, const Matrix &B); +void initMult(Matrix &C, const Matrix &A, const Matrix &B); + +void mult(Matrix &C, const Matrix &A, const Matrix &B); +void multBlocked(Matrix &C, const Matrix &A, const Matrix &B, int blockSize); +void multBlockedUnrolled(Matrix &C, const Matrix &A, const Matrix &B); + +#endif /* SRC_MATRIX_H_ */ diff --git a/tests/large/A.bin b/tests/large/A.bin new file mode 100644 index 0000000000000000000000000000000000000000..81b8b496a2e337da95e257f088913e9efc3e1481 Binary files /dev/null and b/tests/large/A.bin differ diff --git a/tests/large/B.bin b/tests/large/B.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1659f4063a00e441788e56def3d7bde88a2e9dc Binary files /dev/null and b/tests/large/B.bin differ diff --git a/tests/large/C.bin b/tests/large/C.bin new file mode 100644 index 0000000000000000000000000000000000000000..7ef624fc36924d7c9c5259dcbb4fb49958e8e38e Binary files /dev/null and b/tests/large/C.bin differ diff --git a/tests/primes/A.bin b/tests/primes/A.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a927f0ef7b8e90e8ec66838cded170ad6719531 Binary files /dev/null and b/tests/primes/A.bin differ diff --git a/tests/primes/B.bin b/tests/primes/B.bin new file mode 100644 index 0000000000000000000000000000000000000000..16f1cdb9bda1fda08317fb39546bad62f7010f61 Binary files /dev/null and b/tests/primes/B.bin differ diff --git a/tests/primes/C.bin b/tests/primes/C.bin new file mode 100644 index 0000000000000000000000000000000000000000..4828ce863b6beccb002b2cd26d8e546928ca67ae Binary files /dev/null and b/tests/primes/C.bin differ diff --git a/tests/small/A.bin b/tests/small/A.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0acb950a26f4eadade3aa3b06e260c20971b56b Binary files /dev/null and b/tests/small/A.bin differ diff --git a/tests/small/B.bin b/tests/small/B.bin new file mode 100644 index 0000000000000000000000000000000000000000..2da753f15bda1b3b029db9af2792379e393d933f Binary files /dev/null and b/tests/small/B.bin differ diff --git a/tests/small/C.bin b/tests/small/C.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9e6bcfb5acef318000412066325dfeebab850ca Binary files /dev/null and b/tests/small/C.bin differ diff --git a/tests/tiny/A.bin b/tests/tiny/A.bin new file mode 100644 index 0000000000000000000000000000000000000000..339f540711dca06a0e2761fad176ea471b184f20 Binary files /dev/null and b/tests/tiny/A.bin differ diff --git a/tests/tiny/B.bin b/tests/tiny/B.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9d28b01f9ab592ee2ae25cef960d216fb9cff0b Binary files /dev/null and b/tests/tiny/B.bin differ diff --git a/tests/tiny/C.bin b/tests/tiny/C.bin new file mode 100644 index 0000000000000000000000000000000000000000..5eee4b5bc98d6c27123be027892d29389fb9bb18 Binary files /dev/null and b/tests/tiny/C.bin differ