Commit 4b8a2426 authored by Pavel Gajdušek's avatar Pavel Gajdušek
Browse files

change structure, bad links

parent 0371675b
......@@ -91,92 +91,92 @@ Expected output of the deviceQuery example executed on a node with Tesla K20m is
In this section we provide a basic CUDA based vector addition code example. You can directly copy and paste the code to test it.
```console
```cpp
$ vim test.cu
#define N (2048*2048)
#define THREADS_PER_BLOCK 512
#include <stdio.h>
#include <stdlib.h>
// GPU kernel function to add two vectors
__global__ void add_gpu( int *a, int *b, int *c, int n){
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n)
c[index] = a[index] + b[index];
#define N (2048*2048)
#define THREADS_PER_BLOCK 512
#include <stdio.h>
#include <stdlib.h>
// GPU kernel function to add two vectors
__global__ void add_gpu( int *a, int *b, int *c, int n){
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n)
c[index] = a[index] + b[index];
}
// CPU function to add two vectors
void add_cpu (int *a, int *b, int *c, int n) {
for (int i=0; i < n; i++)
c[i] = a[i] + b[i];
}
// CPU function to generate a vector of random integers
void random_ints (int *a, int n) {
for (int i = 0; i < n; i++)
a[i] = rand() % 10000; // random number between 0 and 9999
}
// CPU function to compare two vectors
int compare_ints( int *a, int *b, int n ){
int pass = 0;
for (int i = 0; i < N; i++){
if (a[i] != b[i]) {
printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]);
pass = 1;
}
}
if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn");
return pass;
}
// CPU function to add two vectors
void add_cpu (int *a, int *b, int *c, int n) {
for (int i=0; i < n; i++)
c[i] = a[i] + b[i];
}
int main( void ) {
// CPU function to generate a vector of random integers
void random_ints (int *a, int n) {
for (int i = 0; i < n; i++)
a[i] = rand() % 10000; // random number between 0 and 9999
}
int *a, *b, *c; // host copies of a, b, c
int *dev_a, *dev_b, *dev_c; // device copies of a, b, c
int size = N * sizeof( int ); // we need space for N integers
// CPU function to compare two vectors
int compare_ints( int *a, int *b, int n ){
int pass = 0;
for (int i = 0; i < N; i++){
if (a[i] != b[i]) {
printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]);
pass = 1;
}
}
if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn");
return pass;
}
int main( void ) {
int *a, *b, *c; // host copies of a, b, c
int *dev_a, *dev_b, *dev_c; // device copies of a, b, c
int size = N * sizeof( int ); // we need space for N integers
// Allocate GPU/device copies of dev_a, dev_b, dev_c
cudaMalloc( (void**)&dev_a, size );
cudaMalloc( (void**)&dev_b, size );
cudaMalloc( (void**)&dev_c, size );
// Allocate GPU/device copies of dev_a, dev_b, dev_c
cudaMalloc( (void**)&dev_a, size );
cudaMalloc( (void**)&dev_b, size );
cudaMalloc( (void**)&dev_c, size );
// Allocate CPU/host copies of a, b, c
a = (int*)malloc( size );
b = (int*)malloc( size );
c = (int*)malloc( size );
// Allocate CPU/host copies of a, b, c
a = (int*)malloc( size );
b = (int*)malloc( size );
c = (int*)malloc( size );
// Fill input vectors with random integer numbers
random_ints( a, N );
random_ints( b, N );
// Fill input vectors with random integer numbers
random_ints( a, N );
random_ints( b, N );
// copy inputs to device
cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice );
cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice );
// copy inputs to device
cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice );
cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice );
// launch add_gpu() kernel with blocks and threads
add_gpu<<< N/THREADS_PER_BLOCK, THREADS_PER_BLOCK >>( dev_a, dev_b, dev_c, N );
// launch add_gpu() kernel with blocks and threads
add_gpu<<< N/THREADS_PER_BLOCK, THREADS_PER_BLOCK >>( dev_a, dev_b, dev_c, N );
// copy device result back to host copy of c
cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost );
// copy device result back to host copy of c
cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost );
//Check the results with CPU implementation
int *c_h; c_h = (int*)malloc( size );
add_cpu (a, b, c_h, N);
compare_ints(c, c_h, N);
//Check the results with CPU implementation
int *c_h; c_h = (int*)malloc( size );
add_cpu (a, b, c_h, N);
compare_ints(c, c_h, N);
// Clean CPU memory allocations
free( a ); free( b ); free( c ); free (c_h);
// Clean CPU memory allocations
free( a ); free( b ); free( c ); free (c_h);
// Clean GPU memory allocations
cudaFree( dev_a );
cudaFree( dev_b );
cudaFree( dev_c );
// Clean GPU memory allocations
cudaFree( dev_a );
cudaFree( dev_b );
cudaFree( dev_c );
return 0;
}
return 0;
}
```
This code can be compiled using following command
......@@ -204,81 +204,81 @@ The NVIDIA CUDA Basic Linear Algebra Subroutines (cuBLAS) library is a GPU-accel
SAXPY function multiplies the vector x by the scalar alpha and adds it to the vector y overwriting the latest vector with the result. The description of the cuBLAS function can be found in [NVIDIA CUDA documentation](http://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-axpy "Nvidia CUDA documentation "). Code can be pasted in the file and compiled without any modification.
```cpp
/* Includes, system */
#include <stdio.h>
#include <stdlib.h>
/* Includes, cuda */
#include <cuda_runtime.h>
#include <cublas_v2.h>
/* Vector size */
#define N (32)
/* Host implementation of a simple version of saxpi */
void saxpy(int n, float alpha, const float *x, float *y)
/* Includes, system */
#include <stdio.h>
#include <stdlib.h>
/* Includes, cuda */
#include <cuda_runtime.h>
#include <cublas_v2.h>
/* Vector size */
#define N (32)
/* Host implementation of a simple version of saxpi */
void saxpy(int n, float alpha, const float *x, float *y)
{
for (int i = 0; i < n; ++i)
y[i] = alpha*x[i] + y[i];
}
/* Main */
int main(int argc, char **argv)
{
float *h_X, *h_Y, *h_Y_ref;
float *d_X = 0;
float *d_Y = 0;
const float alpha = 1.0f;
int i;
cublasHandle_t handle;
/* Initialize CUBLAS */
printf("simpleCUBLAS test running..n");
cublasCreate(&handle);
/* Allocate host memory for the matrices */
h_X = (float *)malloc(N * sizeof(h_X[0]));
h_Y = (float *)malloc(N * sizeof(h_Y[0]));
h_Y_ref = (float *)malloc(N * sizeof(h_Y_ref[0]));
/* Fill the matrices with test data */
for (i = 0; i < N; i++)
{
for (int i = 0; i < n; ++i)
y[i] = alpha*x[i] + y[i];
h_X[i] = rand() / (float)RAND_MAX;
h_Y[i] = rand() / (float)RAND_MAX;
h_Y_ref[i] = h_Y[i];
}
/* Main */
int main(int argc, char **argv)
{
float *h_X, *h_Y, *h_Y_ref;
float *d_X = 0;
float *d_Y = 0;
const float alpha = 1.0f;
int i;
cublasHandle_t handle;
/* Allocate device memory for the matrices */
cudaMalloc((void **)&d_X, N * sizeof(d_X[0]));
cudaMalloc((void **)&d_Y, N * sizeof(d_Y[0]));
/* Initialize CUBLAS */
printf("simpleCUBLAS test running..n");
cublasCreate(&handle);
/* Initialize the device matrices with the host matrices */
cublasSetVector(N, sizeof(h_X[0]), h_X, 1, d_X, 1);
cublasSetVector(N, sizeof(h_Y[0]), h_Y, 1, d_Y, 1);
/* Allocate host memory for the matrices */
h_X = (float *)malloc(N * sizeof(h_X[0]));
h_Y = (float *)malloc(N * sizeof(h_Y[0]));
h_Y_ref = (float *)malloc(N * sizeof(h_Y_ref[0]));
/* Performs operation using plain C code */
saxpy(N, alpha, h_X, h_Y_ref);
/* Fill the matrices with test data */
for (i = 0; i < N; i++)
{
h_X[i] = rand() / (float)RAND_MAX;
h_Y[i] = rand() / (float)RAND_MAX;
h_Y_ref[i] = h_Y[i];
}
/* Performs operation using cublas */
cublasSaxpy(handle, N, &alpha, d_X, 1, d_Y, 1);
/* Allocate device memory for the matrices */
cudaMalloc((void **)&d_X, N * sizeof(d_X[0]));
cudaMalloc((void **)&d_Y, N * sizeof(d_Y[0]));
/* Read the result back */
cublasGetVector(N, sizeof(h_Y[0]), d_Y, 1, h_Y, 1);
/* Initialize the device matrices with the host matrices */
cublasSetVector(N, sizeof(h_X[0]), h_X, 1, d_X, 1);
cublasSetVector(N, sizeof(h_Y[0]), h_Y, 1, d_Y, 1);
/* Check result against reference */
for (i = 0; i < N; ++i)
printf("CPU res = %f t GPU res = %f t diff = %f n", h_Y_ref[i], h_Y[i], h_Y_ref[i] - h_Y[i]);
/* Performs operation using plain C code */
saxpy(N, alpha, h_X, h_Y_ref);
/* Memory clean up */
free(h_X); free(h_Y); free(h_Y_ref);
cudaFree(d_X); cudaFree(d_Y);
/* Performs operation using cublas */
cublasSaxpy(handle, N, &alpha, d_X, 1, d_Y, 1);
/* Read the result back */
cublasGetVector(N, sizeof(h_Y[0]), d_Y, 1, h_Y, 1);
/* Check result against reference */
for (i = 0; i < N; ++i)
printf("CPU res = %f t GPU res = %f t diff = %f n", h_Y_ref[i], h_Y[i], h_Y_ref[i] - h_Y[i]);
/* Memory clean up */
free(h_X); free(h_Y); free(h_Y_ref);
cudaFree(d_X); cudaFree(d_Y);
/* Shutdown */
cublasDestroy(handle);
}
/* Shutdown */
cublasDestroy(handle);
}
```
!!! note
......
# NWChem
## Introduction
NWChem aims to provide its users with computational chemistry tools that are scalable both in their ability to treat large scientific computational chemistry problems efficiently, and in their use of available parallel computing resources from high-performance parallel supercomputers to conventional workstation clusters.
[Homepage](http://www.nwchem-sw.org/index.php/Main_Page)
## Installed Versions
For a current list of installed versions, execute:
```console
$ ml av NWChem
```
## Running
NWChem is compiled for parallel MPI execution. Normal procedure for MPI jobs applies. Sample jobscript (for Salomon on 24 threads):
```bash
#PBS -A IT4I-0-0
#PBS -q qprod
#PBS -l select=1:ncpus=24:mpiprocs=24
cd $PBS_O_WORKDIR
module add NWChem
mpirun nwchem h2o.nw
```
## Options
Please refer to [the documentation](http://www.nwchem-sw.org/index.php/Release62:Top-level) and in the input file set the following directives :
* MEMORY : controls the amount of memory NWChem will use
* SCRATCH_DIR : set this to a directory in [SCRATCH filesystem](../../storage/storage/) (or run the calculation completely in a scratch directory). For certain calculations, it might be advisable to reduce I/O by forcing "direct" mode, eg. "scf direct"
# COMSOL Multiphysics
## Introduction
[COMSOL](http://www.comsol.com) is a powerful environment for modelling and solving various engineering and scientific problems based on partial differential equations. COMSOL is designed to solve coupled or multiphysics phenomena. For many standard engineering problems COMSOL provides add-on products such as electrical, mechanical, fluid flow, and chemical applications.
* [Structural Mechanics Module](http://www.comsol.com/structural-mechanics-module),
* [Heat Transfer Module](http://www.comsol.com/heat-transfer-module),
* [CFD Module](http://www.comsol.com/cfd-module),
* [Acoustics Module](http://www.comsol.com/acoustics-module),
* and [many others](http://www.comsol.com/products)
COMSOL also allows an interface support for equation-based modelling of partial differential equations.
## Execution
On the clusters COMSOL is available in the latest stable version. There are two variants of the release:
* **Non commercial** or so called **EDU variant**, which can be used for research and educational purposes.
* **Commercial** or so called **COM variant**, which can used also for commercial activities. **COM variant** has only subset of features compared to the **EDU variant** available. More about licensing [here](licensing-and-available-versions).
To load the of COMSOL load the module
```console
$ ml COMSOL
```
By default the **EDU variant** will be loaded. If user needs other version or variant, load the particular version. To obtain the list of available versions use
```console
$ ml av COMSOL
```
If user needs to prepare COMSOL jobs in the interactive mode it is recommend to use COMSOL on the compute nodes via PBS Pro scheduler. In order run the COMSOL Desktop GUI on Windows is recommended to use the [Virtual Network Computing (VNC)](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/).
Example for Salomon:
```console
$ xhost +
$ qsub -I -X -A PROJECT_ID -q qprod -l select=1:ppn=24
$ ml COMSOL
$ comsol
```
To run COMSOL in batch mode, without the COMSOL Desktop GUI environment, user can utilized the default (comsol.pbs) job script and execute it via the qsub command.
```bash
#!/bin/bash
#PBS -l select=3:ppn=24
#PBS -q qprod
#PBS -N JOB_NAME
#PBS -A PROJECT_ID
cd /scratch/work/user/$USER/ || exit # on Anselm use: /scratch/$USER
echo Time is `date`
echo Directory is `pwd`
echo '**PBS_NODEFILE***START*******'
cat $PBS_NODEFILE
echo '**PBS_NODEFILE***END*********'
text_nodes < cat $PBS_NODEFILE
module load COMSOL
# module load COMSOL/51-EDU
ntask=$(wc -l $PBS_NODEFILE)
comsol -nn ${ntask} batch -configuration /tmp –mpiarg –rmk –mpiarg pbs -tmpdir /scratch/.../$USER/ -inputfile name_input_f.mph -outputfile name_output_f.mph -batchlog name_log_f.log
```
Working directory has to be created before sending the (comsol.pbs) job script into the queue. Input file (name_input_f.mph) has to be in working directory or full path to input file has to be specified. The appropriate path to the temp directory of the job has to be set by command option (-tmpdir).
## LiveLink for MATLAB
COMSOL is the software package for the numerical solution of the partial differential equations. LiveLink for MATLAB allows connection to the COMSOL API (Application Programming Interface) with the benefits of the programming language and computing environment of the MATLAB.
LiveLink for MATLAB is available in both **EDU** and **COM** **variant** of the COMSOL release. On the clusters 1 commercial (**COM**) license and the 5 educational (**EDU**) licenses of LiveLink for MATLAB (please see the [ISV Licenses](../../../anselm/software/isv_licenses/)) are available. Following example shows how to start COMSOL model from MATLAB via LiveLink in the interactive mode (on Anselm use 16 threads).
```console
$ xhost +
$ qsub -I -X -A PROJECT_ID -q qexp -l select=1:ppn=24
$ ml MATLAB
$ ml COMSOL
$ comsol server MATLAB
```
At the first time to launch the LiveLink for MATLAB (client-MATLAB/server-COMSOL connection) the login and password is requested and this information is not requested again.
To run LiveLink for MATLAB in batch mode with (comsol_matlab.pbs) job script you can utilize/modify the following script and execute it via the qsub command.
```bash
#!/bin/bash
#PBS -l select=3:ppn=24
#PBS -q qprod
#PBS -N JOB_NAME
#PBS -A PROJECT_ID
cd /scratch/work/user/$USER || exit # on Anselm use: /scratch/$USER
echo Time is `date`
echo Directory is `pwd`
echo '**PBS_NODEFILE***START*******'
cat $PBS_NODEFILE
echo '**PBS_NODEFILE***END*********'
text_nodes < cat $PBS_NODEFILE
module load MATLAB
module load COMSOL/51-EDU
ntask=$(wc -l $PBS_NODEFILE)
comsol -nn ${ntask} server -configuration /tmp -mpiarg -rmk -mpiarg pbs -tmpdir /scratch/work/user/$USER/work &
cd /apps/cae/COMSOL/51/mli
matlab -nodesktop -nosplash -r "mphstart; addpath /scratch/work/user/$USER/work; test_job"
```
This example shows how to run Livelink for MATLAB with following configuration: 3 nodes and 24 cores per node. Working directory has to be created before submitting (comsol_matlab.pbs) job script into the queue. Input file (test_job.m) has to be in working directory or full path to input file has to be specified. The Matlab command option (-r ”mphstart”) created a connection with a COMSOL server using the default port number.
# Licensing and Available Versions
## Comsol Licence Can Be Used By:
* all persons in the carrying out of the CE IT4Innovations Project (In addition to the primary licensee, which is VSB - Technical University of Ostrava, users are CE IT4Innovations third parties - CE IT4Innovations project partners, particularly the University of Ostrava, the Brno University of Technology - Faculty of Informatics, the Silesian University in Opava, Institute of Geonics AS CR.)
* all persons who have a valid license
* students of the Technical University
## Comsol EDU Network Licence
The licence intended to be used for science and research, publications, students’ projects, teaching (academic licence).
## Comsol COM Network Licence
The licence intended to be used for science and research, publications, students’ projects, commercial research with no commercial use restrictions. Enables the solution of at least one job by one user in one program start.
## Available Versions
* ver. 51
# ISV Licenses
## Guide to Managing Independent Software Vendor Licenses
On Anselm cluster there are also installed commercial software applications, also known as ISV (Independent Software Vendor), which are subjects to licensing. The licenses are limited and their usage may be restricted only to some users or user groups.
Currently Flex License Manager based licensing is supported on the cluster for products ANSYS, Comsol and MATLAB. More information about the applications can be found in the general software section.
If an ISV application was purchased for educational (research) purposes and also for commercial purposes, then there are always two separate versions maintained and suffix "edu" is used in the name of the non-commercial version.
## Overview of the Licenses Usage
!!! note
The overview is generated every minute and is accessible from web or command line interface.
### Web Interface
For each license there is a table, which provides the information about the name, number of available (purchased/licensed), number of used and number of free license features <https://extranet.it4i.cz/anselm/licenses>
### Text Interface
For each license there is a unique text file, which provides the information about the name, number of available (purchased/licensed), number of used and number of free license features. The text files are accessible from the Anselm command prompt.
| Product | File with license state | Note |
| ---------- | ------------------------------------------------- | ------------------- |
| ansys | /apps/user/licenses/ansys_features_state.txt | Commercial |
| comsol | /apps/user/licenses/comsol_features_state.txt | Commercial |
| comsol-edu | /apps/user/licenses/comsol-edu_features_state.txt | Non-commercial only |
| matlab | /apps/user/licenses/matlab_features_state.txt | Commercial |
| matlab-edu | /apps/user/licenses/matlab-edu_features_state.txt | Non-commercial only |
The file has a header which serves as a legend. All the info in the legend starts with a hash (#) so it can be easily filtered when parsing the file via a script.
Example of the Commercial Matlab license state:
```console
$ cat /apps/user/licenses/matlab_features_state.txt
# matlab
# -------------------------------------------------
# FEATURE TOTAL USED AVAIL
# -------------------------------------------------
MATLAB 1 1 0
SIMULINK 1 0 1
Curve_Fitting_Toolbox 1 0 1
Signal_Blocks 1 0 1
GADS_Toolbox 1 0 1
Image_Toolbox 1 0 1
Compiler 1 0 1
Neural_Network_Toolbox 1 0 1
Optimization_Toolbox 1 0 1
Signal_Toolbox 1 0 1
Statistics_Toolbox 1 0 1
```
## License Tracking in PBS Pro Scheduler and Users Usage
Each feature of each license is accounted and checked by the scheduler of PBS Pro. If you ask for certain licenses, the scheduler won't start the job until the asked licenses are free (available). This prevents to crash batch jobs, just because of unavailability of the needed licenses.
The general format of the name is `feature__APP__FEATURE`.
Names of applications (APP):
* ansys
* comsol
* comsol-edu
* matlab
* matlab-edu
To get the FEATUREs of a license take a look into the corresponding state file ([see above](isv_licenses/#Licence)), or use:
### Application and List of Provided Features
* **ansys** $ grep -v "#" /apps/user/licenses/ansys_features_state.txt | cut -f1 -d' '
* **comsol** $ grep -v "#" /apps/user/licenses/comsol_features_state.txt | cut -f1 -d' '
* **comsol-ed** $ grep -v "#" /apps/user/licenses/comsol-edu_features_state.txt | cut -f1 -d' '
* **matlab** $ grep -v "#" /apps/user/licenses/matlab_features_state.txt | cut -f1 -d' '
* **matlab-edu** $ grep -v "#" /apps/user/licenses/matlab-edu_features_state.txt | cut -f1 -d' '
Example of PBS Pro resource name, based on APP and FEATURE name:
| Application | Feature | PBS Pro resource name |
| ----------- | -------------------------- | ----------------------------------------------- |
| ansys | acfd | feature_ansys_acfd |
| ansys | aa_r | feature_ansys_aa_r |
| comsol | COMSOL | feature_comsol_COMSOL |
| comsol | HEATTRANSFER | feature_comsol_HEATTRANSFER |
| comsol-edu | COMSOLBATCH | feature_comsol-edu_COMSOLBATCH |
| comsol-edu | STRUCTURALMECHANICS | feature_comsol-edu_STRUCTURALMECHANICS |
| matlab | MATLAB | feature_matlab_MATLAB |
| matlab | Image_Toolbox | feature_matlab_Image_Toolbox |
| matlab-edu | MATLAB_Distrib_Comp_Engine | feature_matlab-edu_MATLAB_Distrib_Comp_Engine |
| matlab-edu | Image_Acquisition_Toolbox | feature_matlab-edu_Image_Acquisition_Toolbox\\ |
!!! Warnig
Resource names in PBS Pro are case sensitive.
### Example of qsub Statement
Run an interactive PBS job with 1 Matlab EDU license, 1 Distributed Computing Toolbox and 32 Distributed Computing Engines (running on 32 cores):
```console
$ qsub -I -q qprod -A PROJECT_ID -l select=2:ncpus=16 -l feature__matlab-edu__MATLAB=1 -l feature__matlab-edu__Distrib_Computing_Toolbox=1 -l feature__matlab-edu__MATLAB_Distrib_Comp_Engine=32
```
The license is used and accounted only with the real usage of the product. So in this example, the general Matlab is used after Matlab is run by the user and not at the time, when the shell of the interactive job is started. Also the Distributed Computing licenses are used at the time, when the user uses the distributed parallel computation in Matlab (e. g. issues pmode start, matlabpool, etc.).