Skip to content
Snippets Groups Projects
Commit c1247137 authored by Jan Siwiec's avatar Jan Siwiec
Browse files

Update amd.md

parent c2a7bf94
No related branches found
No related tags found
No related merge requests found
Pipeline #32352 passed with warnings
...@@ -10,7 +10,7 @@ salloc -N 1 -c 64 -A PROJECT-ID -p p03-amd --gres=gpu:4 --time=08:00:00 ...@@ -10,7 +10,7 @@ salloc -N 1 -c 64 -A PROJECT-ID -p p03-amd --gres=gpu:4 --time=08:00:00
where: where:
- `-N 1` means allocating one server, - `-N 1` means allocating one server,
- `-c 64` means allocation 64 cores, - `-c 64` means allocation 64 cores,
- `-A` is your project, - `-A` is your project,
- `-p p03-amd` is AMD partition, - `-p p03-amd` is AMD partition,
- `--gres=gpu:4` means allocating all 4 GPUs of the node, - `--gres=gpu:4` means allocating all 4 GPUs of the node,
...@@ -96,7 +96,7 @@ int main() ...@@ -96,7 +96,7 @@ int main()
for(long long i = 0; i < count; i++) for(long long i = 0; i < count; i++)
printf(" %7.2f", h_y[i]); printf(" %7.2f", h_y[i]);
printf("\n"); printf("\n");
// allocation of memory on the GPU device // allocation of memory on the GPU device
float * d_x; float * d_x;
float * d_y; float * d_y;
...@@ -162,7 +162,7 @@ Y: 0.00 110.00 220.00 330.00 440.00 550.00 660.00 770.00 880.00 990. ...@@ -162,7 +162,7 @@ Y: 0.00 110.00 220.00 330.00 440.00 550.00 660.00 770.00 880.00 990.
## HIP and ROCm Libraries ## HIP and ROCm Libraries
The list of official AMD libraries can be found here: [https://docs.amd.com/category/libraries][2]. The list of official AMD libraries can be found here: [https://docs.amd.com/category/libraries][2].
The libraries are installed in the same directory as ROCm The libraries are installed in the same directory as ROCm
...@@ -211,7 +211,7 @@ For this example we use `hipblas.hip.cpp`. ...@@ -211,7 +211,7 @@ For this example we use `hipblas.hip.cpp`.
int main() int main()
{ {
srand(9600); srand(9600);
int width = 10; int width = 10;
...@@ -241,7 +241,7 @@ int main() ...@@ -241,7 +241,7 @@ int main()
for(int i = 0; i < width; i++) for(int i = 0; i < width; i++)
printf("%6.3f ", h_x[i]); printf("%6.3f ", h_x[i]);
printf("\n"); printf("\n");
float * h_y; float * h_y;
hipHostMalloc(&h_y, height * sizeof(*h_y)); hipHostMalloc(&h_y, height * sizeof(*h_y));
for(int i = 0; i < height; i++) for(int i = 0; i < height; i++)
...@@ -251,7 +251,7 @@ int main() ...@@ -251,7 +251,7 @@ int main()
printf("%6.3f ", h_x[i]); printf("%6.3f ", h_x[i]);
printf("\n"); printf("\n");
// initialization of data in GPU memory // initialization of data in GPU memory
float * d_A; float * d_A;
...@@ -263,7 +263,7 @@ int main() ...@@ -263,7 +263,7 @@ int main()
float * d_x; float * d_x;
hipMalloc(&d_x, width * sizeof(*d_x)); hipMalloc(&d_x, width * sizeof(*d_x));
hipMemcpy(d_x, h_x, width * sizeof(*d_x), hipMemcpyHostToDevice); hipMemcpy(d_x, h_x, width * sizeof(*d_x), hipMemcpyHostToDevice);
float * d_y; float * d_y;
hipMalloc(&d_y, height * sizeof(*d_y)); hipMalloc(&d_y, height * sizeof(*d_y));
hipMemcpy(d_y, h_y, height * sizeof(*d_y), hipMemcpyHostToDevice); hipMemcpy(d_y, h_y, height * sizeof(*d_y), hipMemcpyHostToDevice);
...@@ -282,8 +282,8 @@ int main() ...@@ -282,8 +282,8 @@ int main()
for(int i = 0; i < height; i++) for(int i = 0; i < height; i++)
printf("%6.3f ", h_y[i]); printf("%6.3f ", h_y[i]);
printf("\n"); printf("\n");
// calculation of the result on the GPU using the hipBLAS library // calculation of the result on the GPU using the hipBLAS library
hipblasHandle_t blas_handle; hipblasHandle_t blas_handle;
...@@ -293,7 +293,7 @@ int main() ...@@ -293,7 +293,7 @@ int main()
hipDeviceSynchronize(); hipDeviceSynchronize();
hipblasDestroy(blas_handle); hipblasDestroy(blas_handle);
// copy the GPU result to CPU memory and print it // copy the GPU result to CPU memory and print it
hipMemcpy(h_y, d_y, height * sizeof(*d_y), hipMemcpyDeviceToHost); hipMemcpy(h_y, d_y, height * sizeof(*d_y), hipMemcpyDeviceToHost);
...@@ -356,8 +356,8 @@ int main() ...@@ -356,8 +356,8 @@ int main()
for(int c = 0; c < size; c++) for(int c = 0; c < size; c++)
printf("%6.3f ", h_A[r * h_A_ld + c]); printf("%6.3f ", h_A[r * h_A_ld + c]);
printf("\n"); printf("\n");
} }
std::vector<float> h_b(size); std::vector<float> h_b(size);
for(int i = 0; i < size; i++) for(int i = 0; i < size; i++)
h_b[i] = (10.0 * rand()) / RAND_MAX; h_b[i] = (10.0 * rand()) / RAND_MAX;
...@@ -378,7 +378,7 @@ int main() ...@@ -378,7 +378,7 @@ int main()
float * d_b; float * d_b;
hipMalloc(&d_b, size * sizeof(float)); hipMalloc(&d_b, size * sizeof(float));
float * d_x; float * d_x;
hipMalloc(&d_x, size * sizeof(float)); hipMalloc(&d_x, size * sizeof(float));
...@@ -390,7 +390,7 @@ int main() ...@@ -390,7 +390,7 @@ int main()
hipMemcpy2D(d_A, d_A_pitch, h_A.data(), h_A_pitch, size * sizeof(float), size, hipMemcpyHostToDevice); hipMemcpy2D(d_A, d_A_pitch, h_A.data(), h_A_pitch, size * sizeof(float), size, hipMemcpyHostToDevice);
hipMemcpy(d_b, h_b.data(), size * sizeof(float), hipMemcpyHostToDevice); hipMemcpy(d_b, h_b.data(), size * sizeof(float), hipMemcpyHostToDevice);
// solving the system using hipSOLVER // solving the system using hipSOLVER
...@@ -403,7 +403,7 @@ int main() ...@@ -403,7 +403,7 @@ int main()
float * workspace; float * workspace;
int wss = std::max(wss_trf, wss_trs); int wss = std::max(wss_trf, wss_trs);
hipMalloc(&workspace, wss * sizeof(float)); hipMalloc(&workspace, wss * sizeof(float));
hipsolverSgetrf(solverHandle, size, size, d_A, d_A_ld, workspace, wss, d_piv, info); hipsolverSgetrf(solverHandle, size, size, d_A, d_A_ld, workspace, wss, d_piv, info);
hipsolverSgetrs(solverHandle, HIPSOLVER_OP_N, size, 1, d_A, d_A_ld, d_piv, d_b, size, workspace, wss, info); hipsolverSgetrs(solverHandle, HIPSOLVER_OP_N, size, 1, d_A, d_A_ld, d_piv, d_b, size, workspace, wss, info);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment