diff --git a/.spelling b/.spelling index dbf08ddc0112fda8c57d67f854e437c571bad0ed..1ada6e51ac87d673b2e8c0fe6a4913dc157f2e8f 100644 --- a/.spelling +++ b/.spelling @@ -41,6 +41,7 @@ TotalView Valgrind ParaView OpenFOAM +MAX_FAIRSHARE MPI4Py MPICH2 PETSc @@ -94,6 +95,202 @@ AnyConnect X11 backfilling backfilled +SCP +Lustre +QDR +TFLOP +ncpus +myjob +pernode +mpiprocs +ompthreads +qprace +runtime +SVS +ppn +Multiphysics +aeroacoustics +turbomachinery +CFD +LS-DYNA +APDL +MAPDL +multiphysics +AUTODYN +RSM +Molpro +initio +parallelization +NWChem +SCF +ISV +profiler +Pthreads +profilers +OTF +PAPI +PCM +uncore +pre-processing +prepend +CXX +prepended +POMP2 +Memcheck +unaddressable +OTF2 +GPI-2 +GASPI +GPI +MKL +IPP +TBB +GSL +Omics +VNC +Scalasca +IFORT +interprocedural +IDB +cloop +qcow +qcow2 +vmdk +vdi +virtio +paravirtualized +Gbit +tap0 +UDP +TCP +preload +qfat +Rmpi +DCT +datasets +dataset +preconditioners +partitioners +PARDISO +PaStiX +SuiteSparse +SuperLU +ExodusII +NetCDF +ParMETIS +multigrid +HYPRE +SPAI +Epetra +EpetraExt +Tpetra +64-bit +Belos +GMRES +Amesos +IFPACK +preconditioner +Teuchos +Makefiles +SAXPY +NVCC +VCF +HGMD +HUMSAVAR +ClinVar +indels +CIBERER +exomes +tmp +SSHFS +RSYNC +unmount +Cygwin +CygwinX +RFB +TightVNC +TigerVNC +GUIs +XLaunch +UTF-8 +numpad +PuTTYgen +OpenSSH +IE11 +x86 +r21u01n577 +7120P +interprocessor +IPN +toolchains +toolchain +APIs +easyblocks +GM200 +GeForce +GTX +IRUs +ASIC +backplane +ICEX +IRU +PFLOP +T950B +ifconfig +inet +addr +checkbox +appfile +programmatically +http +https +filesystem +phono3py +HDF +splitted +automize +llvm +PGI +GUPC +BUPC +IBV +Aislinn +nondeterminism +stdout +stderr +i.e. +pthreads +uninitialised +broadcasted +ITAC +hotspots +Bioinformatics +semiempirical +DFT +polyfill +ES6 +HTML5Rocks +minifiers +CommonJS +PhantomJS +bundlers +Browserify +versioning +isflowing +ispaused +NPM +sublicense +Streams2 +Streams3 +blogpost +GPG +mississippi +Uint8Arrays +Uint8Array +endianness +styleguide +noop +MkDocs - docs.it4i/anselm-cluster-documentation/environment-and-modules.md MODULEPATH bashrc @@ -127,6 +324,7 @@ Rmax E5-2665 E5-2470 P5110 +isw - docs.it4i/anselm-cluster-documentation/introduction.md RedHat - docs.it4i/anselm-cluster-documentation/job-priority.md @@ -134,6 +332,8 @@ walltime qexp _List.fairshare _time +_FAIRSHARE +1E6 - docs.it4i/anselm-cluster-documentation/job-submission-and-execution.md 15209.srv11 qsub @@ -154,6 +354,15 @@ jobscript cn108 cn109 cn110 +Name0 +cn17 +_NODEFILE +_O +_WORKDIR +mympiprog.x +_JOBID +myprog.x +openmpi - docs.it4i/anselm-cluster-documentation/network.md ib0 - docs.it4i/anselm-cluster-documentation/prace.md @@ -161,14 +370,19 @@ PRACE qfree it4ifree it4i.portal.clients +prace +1h - docs.it4i/anselm-cluster-documentation/shell-and-data-access.md VPN - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-cfx.md ANSYS CFX cfx.pbs +_r +ane3fl - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.md mapdl.pbs +_dy - docs.it4i/anselm-cluster-documentation/software/ansys/ls-dyna.md HPC lsdyna.pbs @@ -183,9 +397,25 @@ Makefile - docs.it4i/anselm-cluster-documentation/software/gpi2.md gcc cn79 +helloworld +_gpi.c +ibverbs +gaspi +_logger - docs.it4i/anselm-cluster-documentation/software/intel-suite/intel-compilers.md Haswell CPUs +ipo +O3 +vec +xAVX +omp +simd +ivdep +pragmas +openmp +xCORE-AVX2 +axCORE-AVX2 - docs.it4i/anselm-cluster-documentation/software/kvirtualization.md rc.local runlevel @@ -197,6 +427,8 @@ VDE smb.conf TMPDIR run.bat. +slirp +NATs - docs.it4i/anselm-cluster-documentation/software/mpi/mpi4py-mpi-for-python.md NumPy - docs.it4i/anselm-cluster-documentation/software/numerical-languages/matlab_1314.md @@ -205,33 +437,73 @@ matlabcode.m output.out matlabcodefile sched +_feature - docs.it4i/anselm-cluster-documentation/software/numerical-languages/matlab.md UV2000 +maxNumCompThreads +SalomonPBSPro - docs.it4i/anselm-cluster-documentation/software/numerical-languages/octave.md _THREADS +_NUM - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/trilinos.md CMake-aware Makefile.export +_PACKAGE +_CXX +_COMPILER +_INCLUDE +_DIRS +_LIBRARY - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.md ansysdyna.pbs - docs.it4i/anselm-cluster-documentation/software/ansys/ansys.md svsfem.cz +_ - docs.it4i/anselm-cluster-documentation/software/debuggers/valgrind.md libmpiwrap-amd64-linux +O0 +valgrind +malloc +_PRELOAD - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.md cn204 +_LIBS +MAGMAROOT +_magma +_server +_anselm +_from +_mic.sh +_dgetrf +_mic +_03.pdf - docs.it4i/anselm-cluster-documentation/software/paraview.md cn77 localhost +v4.0.1 - docs.it4i/anselm-cluster-documentation/storage.md ssh.du1.cesnet.cz Plzen ssh.du2.cesnet.cz ssh.du3.cesnet.cz +tier1 +_home +_cache +_tape - docs.it4i/salomon/environment-and-modules.md icc +ictce +ifort +imkl +intel +gompi +goolf +BLACS +iompi +iccifort - docs.it4i/salomon/hardware-overview.md HW +E5-4627v2 - docs.it4i/salomon/job-submission-and-execution.md 15209.isrv5 r21u01n577 @@ -256,6 +528,7 @@ mkdir mympiprog.x mpiexec myprog.x +r4i7n0.ib0.smc.salomon.it4i.cz - docs.it4i/salomon/7d-enhanced-hypercube.md cns1 cns576 @@ -264,9 +537,266 @@ r4i7n17 cns577 cns1008 r37u31n1008 +7D - docs.it4i/anselm-cluster-documentation/resources-allocation-policy.md qsub it4ifree it4i.portal.clients x86 -x64 \ No newline at end of file +x64 + - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-fluent.md +anslic +_admin + - docs.it4i/anselm-cluster-documentation/software/chemistry/nwchem.md +_DIR + - docs.it4i/anselm-cluster-documentation/software/comsol-multiphysics.md +EDU +comsol +_matlab.pbs +_job.m +mphstart + - docs.it4i/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.md +perf-report +perf +txt +html +mympiprog +_32p + - docs.it4i/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.md +Hotspots + - docs.it4i/anselm-cluster-documentation/software/debuggers/scalasca.md +scorep + - docs.it4i/anselm-cluster-documentation/software/isv_licenses.md +edu +ansys +_features +_state.txt +f1 +matlab +acfd +_ansys +_acfd +_aa +_comsol +HEATTRANSFER +_HEATTRANSFER +COMSOLBATCH +_COMSOLBATCH +STRUCTURALMECHANICS +_STRUCTURALMECHANICS +_matlab +_Toolbox +_Image +_Distrib +_Comp +_Engine +_Acquisition +pmode +matlabpool + - docs.it4i/anselm-cluster-documentation/software/mpi/mpi.md +mpirun +BLAS1 +FFT +KMP +_AFFINITY +GOMP +_CPU +bullxmpi-1 +mpich2 + - docs.it4i/anselm-cluster-documentation/software/mpi/Running_OpenMPI.md +bysocket +bycore + - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/fftw.md +gcc3.3.3 +pthread +fftw3 +lfftw3 +_threads-lfftw3 +_omp +icc3.3.3 +FFTW2 +gcc2.1.5 +fftw2 +lfftw +_threads +icc2.1.5 +fftw-mpi3 +_mpi +fftw3-mpi +fftw2-mpi +IntelMPI + - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/gsl.md +dwt.c +mkl +lgsl + - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/hdf5.md +icc +hdf5 +_INC +_SHLIB +_CPP +_LIB +_F90 +gcc49 + - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/petsc.md +_Dist + - docs.it4i/anselm-cluster-documentation/software/nvidia-cuda.md +lcublas + - docs.it4i/anselm-cluster-documentation/software/operating-system.md +6.x + - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.md +startxwin +cygwin64binXWin.exe +tcp + - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.md +Xming +XWin.exe. + - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.md +_rsa.ppk + - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.md +_keys +organization.example.com +_rsa + - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md +vpnui.exe + - docs.it4i/salomon/ib-single-plane-topology.md +36-port +Mcell.pdf +r21-r38 +nodes.pdf + - docs.it4i/salomon/introduction.md +E5-2680v3 + - docs.it4i/salomon/network.md +r4i1n0 +r4i1n1 +r4i1n2 +r4i1n3 +ip + - docs.it4i/salomon/software/ansys/setting-license-preferences.md +ansys161 + - docs.it4i/salomon/software/ansys/workbench.md +mpifile.txt +solvehandlers.xml + - docs.it4i/salomon/software/chemistry/phono3py.md +vasprun.xml +disp-XXXXX +disp +_fc3.yaml +ir +_grid +_points.yaml +gofree-cond1 + - docs.it4i/salomon/software/compilers.md +HPF + - docs.it4i/salomon/software/comsol/licensing-and-available-versions.md +ver + - docs.it4i/salomon/software/debuggers/aislinn.md +test.cpp + - docs.it4i/salomon/software/debuggers/intel-vtune-amplifier.md +vtune +_update1 + - docs.it4i/salomon/software/debuggers/valgrind.md +EBROOTVALGRIND + - docs.it4i/salomon/software/intel-suite/intel-advisor.md +O2 + - docs.it4i/salomon/software/intel-suite/intel-compilers.md +UV1 + - docs.it4i/salomon/software/numerical-languages/octave.md +octcode.m +mkoctfile + - docs.it4i/software/orca.md +pdf + - node_modules/es6-promise/README.md +rsvp.js +es6-promise +es6-promise-min +Node.js +testem + - node_modules/spawn-sync/lib/json-buffer/README.md +node.js + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/doc/wg-meetings/2015-01-30.md +WG +domenic +mikeal +io.js +sam +calvin +whatwg +compat +mathias +isaac +chris + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/core-util-is/README.md +core-util-is +v0.12. + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/isarray/README.md +isarray +Gruber +julian +juliangruber.com +NONINFRINGEMENT + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/process-nextick-args/license.md +Metcalf + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/process-nextick-args/readme.md +process-nextick-args +process.nextTick + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/string_decoder/README.md +_decoder.js +Joyent +joyent +repo + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/util-deprecate/History.md +kumavis +jsdocs + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/util-deprecate/README.md +util-deprecate +Rajlich + - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/README.md +v7.0.0 +userland +chrisdickinson +christopher.s.dickinson +gmail.com +9554F04D7259F04124DE6B476D5A82AC7E37093B +calvinmetcalf +calvin.metcalf +F3EF5F62A87FC27A22E643F714CE4FF5015AA242 +Vagg +rvagg +vagg.org +DD8F2338BAE7501E3DD5AC78C273792F7D83545D +sonewman +newmansam +outlook.com +Buus +mafintosh +mathiasbuus +Denicola +domenic.me +Matteo +Collina +mcollina +matteo.collina +3ABC01543F22DD2239285CDD818674489FBC127E + - node_modules/spawn-sync/node_modules/concat-stream/readme.md +concat-stream +concat +cb + - node_modules/spawn-sync/node_modules/os-shim/README.md +0.10.x +os.tmpdir +os.endianness +os.EOL +os.platform +os.arch +0.4.x +Aparicio +Adesis +Netlife +S.L + - node_modules/spawn-sync/node_modules/try-thread-sleep/node_modules/thread-sleep/README.md +node-pre-gyp +npm + - node_modules/spawn-sync/README.md +iojs +>>>>>>> readme diff --git a/README.md b/README.md index 7ca5d57a12709555af7dac7b80f093118926f22d..97c9e526bb0a69696a2bfbf9d10d6a22490b05bb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # User documentation -This is project contain IT4Innovation user documentation source. +This is project contain IT4Innovations user documentation source. ## Environments @@ -42,11 +42,11 @@ $$ To enable the MathJX on page you need to enable it by adding line ```---8<--- "mathjax.md"``` at the end of file. -## Developemnt Environment +## Development Environment ### MkDocs -Documentation pages are build with [MkDocs](http://www.mkdocs.org/), [MkDocs at GitHub](https://github.com/mkdocs/mkdocs/). You need to install mkdocs loacally so that you can build the pages and run developement web server. +Documentation pages are build with [MkDocs](http://www.mkdocs.org/), [MkDocs at GitHub](https://github.com/mkdocs/mkdocs/). You need to install MkDocs locally so that you can build the pages and run development web server. ```bash pip install mkdocs pygments pymdown-extensions diff --git a/docs.it4i/anselm/environment-and-modules.md b/docs.it4i/anselm/environment-and-modules.md index e28a3405f831409b7bc84edfe410d4b226bb3a57..1b6fd9a6485a169d8e3a5c5180a736afed23b29a 100644 --- a/docs.it4i/anselm/environment-and-modules.md +++ b/docs.it4i/anselm/environment-and-modules.md @@ -26,7 +26,7 @@ fi ``` !!! note - Do not run commands outputting to standard output (echo, module list, etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental functionality (scp, PBS) of your account! Conside utilization of SSH session interactivity for such commands as stated in the previous example. + Do not run commands outputting to standard output (echo, module list, etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental functionality (SCP, PBS) of your account! Consider utilization of SSH session interactivity for such commands as stated in the previous example. ## Application Modules diff --git a/docs.it4i/anselm/job-submission-and-execution.md b/docs.it4i/anselm/job-submission-and-execution.md index fc916199f6f98c90f19383c5f058aa53a9023272..72359d718e7719cc3d6b037be27b8812bc5b862b 100644 --- a/docs.it4i/anselm/job-submission-and-execution.md +++ b/docs.it4i/anselm/job-submission-and-execution.md @@ -323,7 +323,7 @@ cd $SCRDIR || exit cp $PBS_O_WORKDIR/input . cp $PBS_O_WORKDIR/mympiprog.x . -# load the mpi module +# load the MPI module ml OpenMPI # execute the calculation @@ -361,8 +361,13 @@ Example jobscript for an MPI job with preloaded inputs and executables, options SCRDIR=/scratch/$USER/myjob cd $SCRDIR || exit +<<<<<<< HEAD # load the mpi module ml OpenMPI +======= +# load the MPI module +module load openmpi +>>>>>>> readme # execute the calculation mpirun ./mympiprog.x diff --git a/docs.it4i/anselm/software/debuggers/allinea-ddt.md b/docs.it4i/anselm/software/debuggers/allinea-ddt.md new file mode 100644 index 0000000000000000000000000000000000000000..a5dd069ba07728662ce817cb713eeb7325002998 --- /dev/null +++ b/docs.it4i/anselm/software/debuggers/allinea-ddt.md @@ -0,0 +1,94 @@ +# Allinea Forge (DDT,MAP) + +Allinea Forge consist of two tools - debugger DDT and profiler MAP. + +Allinea DDT, is a commercial debugger primarily for debugging parallel MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel Xeon Phi accelerators. DDT provides all the standard debugging features (stack trace, breakpoints, watches, view variables, threads etc.) for every thread running as part of your program, or for every process - even if these processes are distributed across a cluster using an MPI implementation. + +Allinea MAP is a profiler for C/C++/Fortran HPC codes. It is designed for profiling parallel code, which uses Pthreads, OpenMP or MPI. + +## License and Limitations for Anselm Users + +On Anselm users can debug OpenMP or MPI code that runs up to 64 parallel processes. In case of debugging GPU or Xeon Phi accelerated codes the limit is 8 accelerators. These limitation means that: + +* 1 user can debug up 64 processes, or +* 32 users can debug 2 processes, etc. + +In case of debugging on accelerators: + +* 1 user can debug on up to 8 accelerators, or +* 8 users can debug on single accelerator. + +## Compiling Code to Run With DDT + +### Modules + +Load all necessary modules to compile the code. For example: + +```bash + $ module load intel + $ module load impi ... or ... module load openmpi/X.X.X-icc +``` + +Load the Allinea DDT module: + +```bash + $ module load Forge +``` + +Compile the code: + +```bash +$ mpicc -g -O0 -o test_debug test.c + +$ mpif90 -g -O0 -o test_debug test.f +``` + +### Compiler Flags + +Before debugging, you need to compile your code with theses flags: + +!!! note + \* **g** : Generates extra debugging information usable by GDB. -g3 includes even more debugging information. This option is available for GNU and INTEL C/C++ and Fortran compilers. + \* **O0** : Suppress all optimizations. + +## Starting a Job With DDT + +Be sure to log in with an X window forwarding enabled. This could mean using the -X in the ssh: + +```bash + $ ssh -X username@anselm.it4i.cz +``` + +Other options is to access login node using VNC. Please see the detailed information on how to [use graphic user interface on Anselm](/general/accessing-the-clusters/graphical-user-interface/x-window-system/) + +From the login node an interactive session **with X windows forwarding** (-X option) can be started by following command: + +```bash + $ qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=16:mpiprocs=16,walltime=01:00:00 +``` + +Then launch the debugger with the ddt command followed by the name of the executable to debug: + +```bash + $ ddt test_debug +``` + +A submission window that appears have a prefilled path to the executable to debug. You can select the number of MPI processors and/or OpenMP threads on which to run and press run. Command line arguments to a program can be entered to the "Arguments " box. + + + +To start the debugging directly without the submission window, user can specify the debugging and execution parameters from the command line. For example the number of MPI processes is set by option "-np 4". Skipping the dialog is done by "-start" option. To see the list of the "ddt" command line parameters, run "ddt --help". + +```bash + ddt -start -np 4 ./hello_debug_impi +``` + +## Documentation + +Users can find original User Guide after loading the DDT module: + +```bash + $DDTPATH/doc/userguide.pdf +``` + +[1] Discipline, Magic, Inspiration and Science: Best Practice Debugging with Allinea DDT, Workshop conducted at LLNL by Allinea on May 10, 2013, [link](https://computing.llnl.gov/tutorials/allineaDDT/index.html) diff --git a/docs.it4i/anselm/software/debuggers/allinea-performance-reports.md b/docs.it4i/anselm/software/debuggers/allinea-performance-reports.md new file mode 100644 index 0000000000000000000000000000000000000000..7b519b85c094122ff7866ad8a48961ee89dbb4b9 --- /dev/null +++ b/docs.it4i/anselm/software/debuggers/allinea-performance-reports.md @@ -0,0 +1,59 @@ +# Allinea Performance Reports + +## Introduction + +Allinea Performance Reports characterize the performance of HPC application runs. After executing your application through the tool, a synthetic HTML report is generated automatically, containing information about several metrics along with clear behavior statements and hints to help you improve the efficiency of your runs. + +The Allinea Performance Reports is most useful in profiling MPI programs. + +Our license is limited to 64 MPI processes. + +## Modules + +Allinea Performance Reports version 6.0 is available + +```bash + $ module load PerformanceReports/6.0 +``` + +The module sets up environment variables, required for using the Allinea Performance Reports. This particular command loads the default module, which is performance reports version 4.2. + +## Usage + +!!! note + Use the the perf-report wrapper on your (MPI) program. + +Instead of [running your MPI program the usual way](../mpi/), use the the perf report wrapper: + +```bash + $ perf-report mpirun ./mympiprog.x +``` + +The MPI program will run as usual. The perf-report creates two additional files, in \*.txt and \*.html format, containing the performance report. Note that [demanding MPI codes should be run within the queue system](../../job-submission-and-execution/). + +## Example + +In this example, we will be profiling the mympiprog.x MPI program, using Allinea performance reports. Assume that the code is compiled with Intel compilers and linked against Intel MPI library: + +First, we allocate some nodes via the express queue: + +```bash + $ qsub -q qexp -l select=2:ncpus=16:mpiprocs=16:ompthreads=1 -I + qsub: waiting for job 262197.dm2 to start + qsub: job 262197.dm2 ready +``` + +Then we load the modules and run the program the usual way: + +```bash + $ module load intel impi allinea-perf-report/4.2 + $ mpirun ./mympiprog.x +``` + +Now lets profile the code: + +```bash + $ perf-report mpirun ./mympiprog.x +``` + +Performance report files [mympiprog_32p\*.txt](../../../src/mympiprog_32p_2014-10-15_16-56.txt) and [mympiprog_32p\*.html](../../../src/mympiprog_32p_2014-10-15_16-56.html) were created. We can see that the code is very efficient on MPI and is CPU bounded. diff --git a/docs.it4i/anselm/software/intel-suite/intel-compilers.md b/docs.it4i/anselm/software/intel-suite/intel-compilers.md new file mode 100644 index 0000000000000000000000000000000000000000..9cc1f1e0af6abf09cc00d39f7c7dd1c37d78c9e1 --- /dev/null +++ b/docs.it4i/anselm/software/intel-suite/intel-compilers.md @@ -0,0 +1,36 @@ +# Intel Compilers + +The Intel compilers version 13.1.1 are available, via module Intel. The compilers include the ICC C and C++ compiler and the IFORT Fortran 77/90/95 compiler. + +```bash + $ module load intel + $ icc -v + $ ifort -v +``` + +The Intel compilers provide for vectorization of the code, via the AVX instructions and support threading parallelization via OpenMP + +For maximum performance on the Anselm cluster, compile your programs using the AVX instructions, with reporting where the vectorization was used. We recommend following compilation options for high performance + +```bash + $ icc -ipo -O3 -vec -xAVX -vec-report1 myprog.c mysubroutines.c -o myprog.x + $ ifort -ipo -O3 -vec -xAVX -vec-report1 myprog.f mysubroutines.f -o myprog.x +``` + +In this example, we compile the program enabling interprocedural optimizations between source files (-ipo), aggressive loop optimizations (-O3) and vectorization (-vec -xAVX) + +The compiler recognizes the omp, simd, vector and ivdep pragmas for OpenMP parallelization and AVX vectorization. Enable the OpenMP parallelization by the **-openmp** compiler switch. + +```bash + $ icc -ipo -O3 -vec -xAVX -vec-report1 -openmp myprog.c mysubroutines.c -o myprog.x + $ ifort -ipo -O3 -vec -xAVX -vec-report1 -openmp myprog.f mysubroutines.f -o myprog.x +``` + +Read more at <http://software.intel.com/sites/products/documentation/doclib/stdxe/2013/composerxe/compiler/cpp-lin/index.htm> + +## Sandy Bridge/Haswell Binary Compatibility + +Anselm nodes are currently equipped with Sandy Bridge CPUs, while Salomon will use Haswell architecture. >The new processors are backward compatible with the Sandy Bridge nodes, so all programs that ran on the Sandy Bridge processors, should also run on the new Haswell nodes. >To get optimal performance out of the Haswell processors a program should make use of the special AVX2 instructions for this processor. One can do this by recompiling codes with the compiler flags >designated to invoke these instructions. For the Intel compiler suite, there are two ways of doing this: + +* Using compiler flag (both for Fortran and C): -xCORE-AVX2. This will create a binary with AVX2 instructions, specifically for the Haswell processors. Note that the executable will not run on Sandy Bridge nodes. +* Using compiler flags (both for Fortran and C): -xAVX -axCORE-AVX2. This will generate multiple, feature specific auto-dispatch code paths for Intel® processors, if there is a performance benefit. So this binary will run both on Sandy Bridge and Haswell processors. During runtime it will be decided which path to follow, dependent on which processor you are running on. In general this will result in larger binaries. diff --git a/docs.it4i/anselm/software/intel-suite/intel-debugger.md b/docs.it4i/anselm/software/intel-suite/intel-debugger.md new file mode 100644 index 0000000000000000000000000000000000000000..d3acd2c51a364c9a48a7b26c0fcfb96256e0bc5f --- /dev/null +++ b/docs.it4i/anselm/software/intel-suite/intel-debugger.md @@ -0,0 +1,73 @@ +# Intel Debugger + +## Debugging Serial Applications + +The intel debugger version 13.0 is available, via module intel. The debugger works for applications compiled with C and C++ compiler and the ifort fortran 77/90/95 compiler. The debugger provides java GUI environment. Use X display for running the GUI. + +```bash + $ module load intel + $ idb +``` + +The debugger may run in text mode. To debug in text mode, use + +```bash + $ idbc +``` + +To debug on the compute nodes, module intel must be loaded. The GUI on compute nodes may be accessed using the same way as in the GUI section + +Example: + +```bash + $ qsub -q qexp -l select=1:ncpus=16 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19654.srv11 ready + + $ module load intel + $ module load java + $ icc -O0 -g myprog.c -o myprog.x + $ idb ./myprog.x +``` + +In this example, we allocate 1 full compute node, compile program myprog.c with debugging options -O0 -g and run the idb debugger interactively on the myprog.x executable. The GUI access is via X11 port forwarding provided by the PBS workload manager. + +## Debugging Parallel Applications + +Intel debugger is capable of debugging multithreaded and MPI parallel programs as well. + +### Small Number of MPI Ranks + +For debugging small number of MPI ranks, you may execute and debug each rank in separate xterm terminal (do not forget the X display. Using Intel MPI, this may be done in following way: + +```bash + $ qsub -q qexp -l select=2:ncpus=16 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19655.srv11 ready + + $ module load intel impi + $ mpirun -ppn 1 -hostfile $PBS_NODEFILE --enable-x xterm -e idbc ./mympiprog.x +``` + +In this example, we allocate 2 full compute node, run xterm on each node and start idb debugger in command line mode, debugging two ranks of mympiprog.x application. The xterm will pop up for each rank, with idb prompt ready. The example is not limited to use of Intel MPI + +### Large Number of MPI Ranks + +Run the idb debugger from within the MPI debug option. This will cause the debugger to bind to all ranks and provide aggregated outputs across the ranks, pausing execution automatically just after startup. You may then set break points and step the execution manually. Using Intel MPI: + +```bash + $ qsub -q qexp -l select=2:ncpus=16 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19655.srv11 ready + + $ module load intel impi + $ mpirun -n 32 -idb ./mympiprog.x +``` + +### Debugging Multithreaded Application + +Run the idb debugger in GUI mode. The menu Parallel contains number of tools for debugging multiple threads. One of the most useful tools is the **Serialize Execution** tool, which serializes execution of concurrent threads for easy orientation and identification of concurrency related bugs. + +## Further Information + +Exhaustive manual on IDB features and usage is published at [Intel website](http://software.intel.com/sites/products/documentation/doclib/stdxe/2013/composerxe/debugger/user_guide/index.htm) diff --git a/docs.it4i/anselm/software/mpi/mpi.md b/docs.it4i/anselm/software/mpi/mpi.md new file mode 100644 index 0000000000000000000000000000000000000000..08be4bce682e507968cbbc0a03b32854193d3352 --- /dev/null +++ b/docs.it4i/anselm/software/mpi/mpi.md @@ -0,0 +1,148 @@ +# MPI + +## Setting Up MPI Environment + +The Anselm cluster provides several implementations of the MPI library: + +| MPI Library | Thread support | +| ---------------------------------------------------- | --------------------------------------------------------------- | +| The highly optimized and stable **bullxmpi 1.2.4.1** | Partial thread support up to MPI_THREAD_SERIALIZED | +| The **Intel MPI 4.1** | Full thread support up to MPI_THREAD_MULTIPLE | +| The [OpenMPI 1.6.5](href="http://www.open-mpi.org) | Full thread support up to MPI_THREAD_MULTIPLE, BLCR c/r support | +| The OpenMPI 1.8.1 | Full thread support up to MPI_THREAD_MULTIPLE, MPI-3.0 support | +| The **mpich2 1.9** | Full thread support up to MPI_THREAD_MULTIPLE, BLCR c/r support | + +MPI libraries are activated via the environment modules. + +Look up section modulefiles/mpi in module avail + +```bash + $ module avail + ------------------------- /opt/modules/modulefiles/mpi ------------------------- + bullxmpi/bullxmpi-1.2.4.1 mvapich2/1.9-icc + impi/4.0.3.008 openmpi/1.6.5-gcc(default) + impi/4.1.0.024 openmpi/1.6.5-gcc46 + impi/4.1.0.030 openmpi/1.6.5-icc + impi/4.1.1.036(default) openmpi/1.8.1-gcc + openmpi/1.8.1-gcc46 + mvapich2/1.9-gcc(default) openmpi/1.8.1-gcc49 + mvapich2/1.9-gcc46 openmpi/1.8.1-icc +``` + +There are default compilers associated with any particular MPI implementation. The defaults may be changed, the MPI libraries may be used in conjunction with any compiler. The defaults are selected via the modules in following way + +| Module | MPI | Compiler suite | +| ------------ | ---------------- | ------------------------------------------------------------------------------ | +| PrgEnv-gnu | bullxmpi-1.2.4.1 | bullx GNU 4.4.6 | +| PrgEnv-intel | Intel MPI 4.1.1 | Intel 13.1.1 | +| bullxmpi | bullxmpi-1.2.4.1 | none, select via module | +| impi | Intel MPI 4.1.1 | none, select via module | +| openmpi | OpenMPI 1.6.5 | GNU compilers 4.8.1, GNU compilers 4.4.6, Intel Compilers | +| openmpi | OpenMPI 1.8.1 | GNU compilers 4.8.1, GNU compilers 4.4.6, GNU compilers 4.9.0, Intel Compilers | +| mvapich2 | MPICH2 1.9 | GNU compilers 4.8.1, GNU compilers 4.4.6, Intel Compilers | + +Examples: + +```bash + $ module load openmpi +``` + +In this example, we activate the latest openmpi with latest GNU compilers + +To use openmpi with the intel compiler suite, use + +```bash + $ module load intel + $ module load openmpi/1.6.5-icc +``` + +In this example, the openmpi 1.6.5 using intel compilers is activated + +## Compiling MPI Programs + +!!! note + After setting up your MPI environment, compile your program using one of the mpi wrappers + +```bash + $ mpicc -v + $ mpif77 -v + $ mpif90 -v +``` + +Example program: + +```cpp + // helloworld_mpi.c + #include <stdio.h> + + #include<mpi.h> + + int main(int argc, char **argv) { + + int len; + int rank, size; + char node[MPI_MAX_PROCESSOR_NAME]; + + // Initiate MPI + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + MPI_Comm_size(MPI_COMM_WORLD,&size); + + // Get hostame and print + MPI_Get_processor_name(node,&len); + printf("Hello world! from rank %d of %d on host %sn",rank,size,node); + + // Finalize and exit + MPI_Finalize(); + + return 0; + } +``` + +Compile the above example with + +```bash + $ mpicc helloworld_mpi.c -o helloworld_mpi.x +``` + +## Running MPI Programs + +!!! note + The MPI program executable must be compatible with the loaded MPI module. + Always compile and execute using the very same MPI module. + +It is strongly discouraged to mix MPI implementations. Linking an application with one MPI implementation and running mpirun/mpiexec form other implementation may result in unexpected errors. + +The MPI program executable must be available within the same path on all nodes. This is automatically fulfilled on the /home and /scratch file system. You need to preload the executable, if running on the local scratch /lscratch file system. + +### Ways to Run MPI Programs + +Optimal way to run an MPI program depends on its memory requirements, memory access pattern and communication pattern. + +!!! note + Consider these ways to run an MPI program: + + 1. One MPI process per node, 16 threads per process + 2. Two MPI processes per node, 8 threads per process + 3. 16 MPI processes per node, 1 thread per process. + +**One MPI** process per node, using 16 threads, is most useful for memory demanding applications, that make good use of processor cache memory and are not memory bound. This is also a preferred way for communication intensive applications as one process per node enjoys full bandwidth access to the network interface. + +**Two MPI** processes per node, using 8 threads each, bound to processor socket is most useful for memory bandwidth bound applications such as BLAS1 or FFT, with scalable memory demand. However, note that the two processes will share access to the network interface. The 8 threads and socket binding should ensure maximum memory access bandwidth and minimize communication, migration and NUMA effect overheads. + +!!! note + Important! Bind every OpenMP thread to a core! + +In the previous two cases with one or two MPI processes per node, the operating system might still migrate OpenMP threads between cores. You want to avoid this by setting the KMP_AFFINITY or GOMP_CPU_AFFINITY environment variables. + +**16 MPI** processes per node, using 1 thread each bound to processor core is most suitable for highly scalable applications with low communication demand. + +### Running OpenMPI + +The **bullxmpi-1.2.4.1** and [**OpenMPI 1.6.5**](http://www.open-mpi.org/) are both based on OpenMPI. Read more on [how to run OpenMPI](Running_OpenMPI/) based MPI. + +### Running MPICH2 + +The **Intel MPI** and **mpich2 1.9** are MPICH2 based implementations. Read more on [how to run MPICH2](running-mpich2/) based MPI. + +The Intel MPI may run on the Intel Xeon Phi accelerators as well. Read more on [how to run Intel MPI on accelerators](../intel-xeon-phi/). diff --git a/docs.it4i/anselm/software/numerical-languages/matlab.md b/docs.it4i/anselm/software/numerical-languages/matlab.md new file mode 100644 index 0000000000000000000000000000000000000000..cfc958b7e93cde09f34e424d92717f53915097d4 --- /dev/null +++ b/docs.it4i/anselm/software/numerical-languages/matlab.md @@ -0,0 +1,281 @@ +# Matlab + +## Introduction + +Matlab is available in versions R2015a and R2015b. There are always two variants of the release: + +* Non commercial or so called EDU variant, which can be used for common research and educational purposes. +* Commercial or so called COM variant, which can used also for commercial activities. The licenses for commercial variant are much more expensive, so usually the commercial variant has only subset of features compared to the EDU available. + +To load the latest version of Matlab load the module + +```bash + $ module load MATLAB +``` + +By default the EDU variant is marked as default. If you need other version or variant, load the particular version. To obtain the list of available versions use + +```bash + $ module avail MATLAB +``` + +If you need to use the Matlab GUI to prepare your Matlab programs, you can use Matlab directly on the login nodes. But for all computations use Matlab on the compute nodes via PBS Pro scheduler. + +If you require the Matlab GUI, please follow the general information about [running graphical applications](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). + +Matlab GUI is quite slow using the X forwarding built in the PBS (qsub -X), so using X11 display redirection either via SSH or directly by xauth (please see the "GUI Applications on Compute Nodes over VNC" part [here](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-system/)) is recommended. + +To run Matlab with GUI, use + +```bash + $ matlab +``` + +To run Matlab in text mode, without the Matlab Desktop GUI environment, use + +```bash + $ matlab -nodesktop -nosplash +``` + +plots, images, etc... will be still available. + +## Running Parallel Matlab Using Distributed Computing Toolbox / Engine + +!!! note + Distributed toolbox is available only for the EDU variant + +The MPIEXEC mode available in previous versions is no longer available in MATLAB 2015. Also, the programming interface has changed. Refer to [Release Notes](http://www.mathworks.com/help/distcomp/release-notes.html#buanp9e-1). + +Delete previously used file mpiLibConf.m, we have observed crashes when using Intel MPI. + +To use Distributed Computing, you first need to setup a parallel profile. We have provided the profile for you, you can either import it in MATLAB command line: + +```bash + >> parallel.importProfile('/apps/all/MATLAB/2015a-EDU/SalomonPBSPro.settings') + + ans = + + SalomonPBSPro +``` + +Or in the GUI, go to tab HOME -> Parallel -> Manage Cluster Profiles..., click Import and navigate to: + +/apps/all/MATLAB/2015a-EDU/SalomonPBSPro.settings + +With the new mode, MATLAB itself launches the workers via PBS, so you can either use interactive mode or a batch mode on one node, but the actual parallel processing will be done in a separate job started by MATLAB itself. Alternatively, you can use "local" mode to run parallel code on just a single node. + +!!! note + The profile is confusingly named Salomon, but you can use it also on Anselm. + +### Parallel Matlab Interactive Session + +Following example shows how to start interactive session with support for Matlab GUI. For more information about GUI based applications on Anselm see [this page](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-system/). + +```bash + $ xhost + + $ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A NONE-0-0 -q qexp -l select=1 -l walltime=00:30:00 + -l feature__matlab__MATLAB=1 +``` + +This qsub command example shows how to run Matlab on a single node. + +The second part of the command shows how to request all necessary licenses. In this case 1 Matlab-EDU license and 48 Distributed Computing Engines licenses. + +Once the access to compute nodes is granted by PBS, user can load following modules and start Matlab: + +```bash + r1i0n17$ module load MATLAB/2015b-EDU + r1i0n17$ matlab & +``` + +### Parallel Matlab Batch Job in Local Mode + +To run matlab in batch mode, write an matlab script, then write a bash jobscript and execute via the qsub command. By default, matlab will execute one matlab worker instance per allocated core. + +```bash + #!/bin/bash + #PBS -A PROJECT ID + #PBS -q qprod + #PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1 + + # change to shared scratch directory + SCR=/scratch/work/user/$USER/$PBS_JOBID + mkdir -p $SCR ; cd $SCR || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/matlabcode.m . + + # load modules + module load MATLAB/2015a-EDU + + # execute the calculation + matlab -nodisplay -r matlabcode > output.out + + # copy output file to home + cp output.out $PBS_O_WORKDIR/. +``` + +This script may be submitted directly to the PBS workload manager via the qsub command. The inputs and matlab script are in matlabcode.m file, outputs in output.out file. Note the missing .m extension in the matlab -r matlabcodefile call, **the .m must not be included**. Note that the **shared /scratch must be used**. Further, it is **important to include quit** statement at the end of the matlabcode.m script. + +Submit the jobscript using qsub + +```bash + $ qsub ./jobscript +``` + +### Parallel Matlab Local Mode Program Example + +The last part of the configuration is done directly in the user Matlab script before Distributed Computing Toolbox is started. + +```bash + cluster = parcluster('local') +``` + +This script creates scheduler object "cluster" of type "local" that starts workers locally. + +!!! note + Every Matlab script that needs to initialize/use matlabpool has to contain these three lines prior to calling parpool(sched, ...) function. + +The last step is to start matlabpool with "cluster" object and correct number of workers. We have 24 cores per node, so we start 24 workers. + +```bash + parpool(cluster,16); + + + ... parallel code ... + + + parpool close +``` + +The complete example showing how to use Distributed Computing Toolbox in local mode is shown here. + +```bash + cluster = parcluster('local'); + cluster + + parpool(cluster,24); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') + + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. + end + T; + whos % T and W are both distributed arrays here. + + parpool close + quit +``` + +You can copy and paste the example in a .m file and execute. Note that the parpool size should correspond to **total number of cores** available on allocated nodes. + +### Parallel Matlab Batch Job Using PBS Mode (Workers Spawned in a Separate Job) + +This mode uses PBS scheduler to launch the parallel pool. It uses the SalomonPBSPro profile that needs to be imported to Cluster Manager, as mentioned before. This methodod uses MATLAB's PBS Scheduler interface - it spawns the workers in a separate job submitted by MATLAB using qsub. + +This is an example of m-script using PBS mode: + +```bash + cluster = parcluster('SalomonPBSPro'); + set(cluster, 'SubmitArguments', '-A OPEN-0-0'); + set(cluster, 'ResourceTemplate', '-q qprod -l select=10:ncpus=16'); + set(cluster, 'NumWorkers', 160); + + pool = parpool(cluster, 160); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') + + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. + end + whos % T and W are both distributed arrays here. + + % shut down parallel pool + delete(pool) +``` + +Note that we first construct a cluster object using the imported profile, then set some important options, namely: SubmitArguments, where you need to specify accounting id, and ResourceTemplate, where you need to specify number of nodes to run the job. + +You can start this script using batch mode the same way as in Local mode example. + +### Parallel Matlab Batch With Direct Launch (Workers Spawned Within the Existing Job) + +This method is a "hack" invented by us to emulate the mpiexec functionality found in previous MATLAB versions. We leverage the MATLAB Generic Scheduler interface, but instead of submitting the workers to PBS, we launch the workers directly within the running job, thus we avoid the issues with master script and workers running in separate jobs (issues with license not available, waiting for the worker's job to spawn etc.) + +!!! warning + This method is experimental. + +For this method, you need to use SalomonDirect profile, import it using [the same way as SalomonPBSPro](matlab/#running-parallel-matlab-using-distributed-computing-toolbox---engine) + +This is an example of m-script using direct mode: + +```bash + parallel.importProfile('/apps/all/MATLAB/2015a-EDU/SalomonDirect.settings') + cluster = parcluster('SalomonDirect'); + set(cluster, 'NumWorkers', 48); + + pool = parpool(cluster, 48); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') + + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. + end + whos % T and W are both distributed arrays here. + + % shut down parallel pool + delete(pool) +``` + +### Non-Interactive Session and Licenses + +If you want to run batch jobs with Matlab, be sure to request appropriate license features with the PBS Pro scheduler, at least the `-l _feature_matlab_MATLAB=1` for EDU variant of Matlab. More information about how to check the license features states and how to request them with PBS Pro, please [look here](../isv_licenses/). + +In case of non-interactive session please read the [following information](../isv_licenses/) on how to modify the qsub command to test for available licenses prior getting the resource allocation. + +### Matlab Distributed Computing Engines Start Up Time + +Starting Matlab workers is an expensive process that requires certain amount of time. For your information please see the following table: + +| compute nodes | number of workers | start-up time[s] | +| ------------- | ----------------- | ---------------- | +| 16 | 384 | 831 | +| 8 | 192 | 807 | +| 4 | 96 | 483 | +| 2 | 48 | 16 | + +## MATLAB on UV2000 + +UV2000 machine available in queue "qfat" can be used for MATLAB computations. This is a SMP NUMA machine with large amount of RAM, which can be beneficial for certain types of MATLAB jobs. CPU cores are allocated in chunks of 8 for this machine. + +You can use MATLAB on UV2000 in two parallel modes: + +### Threaded Mode + +Since this is a SMP machine, you can completely avoid using Parallel Toolbox and use only MATLAB's threading. MATLAB will automatically detect the number of cores you have allocated and will set maxNumCompThreads accordingly and certain operations, such as `fft`, `eig`, `svd` etc. will be automatically run in threads. The advantage of this mode is that you don't need to modify your existing sequential codes. + +### Local Cluster Mode + +You can also use Parallel Toolbox on UV2000. Use [local cluster mode](matlab/#parallel-matlab-batch-job-in-local-mode), "SalomonPBSPro" profile will not work. diff --git a/docs.it4i/anselm/storage.md b/docs.it4i/anselm/storage.md index 3a3d85f50b2a4d7dd78e3bc96f7a58b480b53be7..579a58d170ef5e91be7d65c2566218b658b614c1 100644 --- a/docs.it4i/anselm/storage.md +++ b/docs.it4i/anselm/storage.md @@ -1,6 +1,6 @@ # Storage -There are two main shared file systems on Anselm cluster, the [HOME](#home) and [SCRATCH](#scratch). All login and compute nodes may access same data on shared file systems. Compute nodes are also equipped with local (non-shared) scratch, ramdisk and tmp file systems. +There are two main shared file systems on Anselm cluster, the [HOME](#home) and [SCRATCH](#scratch). All login and compute nodes may access same data on shared file systems. Compute nodes are also equipped with local (non-shared) scratch, RAM disk and tmp file systems. ## Archiving @@ -352,7 +352,7 @@ First, create the mount point $ mkdir cesnet ``` -Mount the storage. Note that you can choose among the ssh.du1.cesnet.cz (Plzen), ssh.du2.cesnet.cz (Jihlava), ssh.du3.cesnet.cz (Brno) Mount tier1_home **(only 5120M !)**: +Mount the storage. Note that you can choose among the ssh.du1.cesnet.cz (Plzen), ssh.du2.cesnet.cz (Jihlava), ssh.du3.cesnet.cz (Brno) Mount tier1_home **(only 5120 MB !)**: ```console $ sshfs username@ssh.du1.cesnet.cz:. cesnet/ @@ -384,16 +384,23 @@ Once done, please remember to unmount the storage $ fusermount -u cesnet ``` +<<<<<<< HEAD:docs.it4i/anselm/storage.md ### Rsync Access !!! note Rsync provides delta transfer for best performance, can resume interrupted transfers +======= +### RSYNC access -Rsync is a fast and extraordinarily versatile file copying tool. It is famous for its delta-transfer algorithm, which reduces the amount of data sent over the network by sending only the differences between the source files and the existing files in the destination. Rsync is widely used for backups and mirroring and as an improved copy command for everyday use. +!!! Note "Note" + RSYNC provides delta transfer for best performance, can resume interrupted transfers +>>>>>>> Spelling corrections:docs.it4i/anselm-cluster-documentation/storage.md -Rsync finds files that need to be transferred using a "quick check" algorithm (by default) that looks for files that have changed in size or in last-modified time. Any changes in the other preserved attributes (as requested by options) are made on the destination file directly when the quick check indicates that the file's data does not need to be updated. +RSYNC is a fast and extraordinarily versatile file copying tool. It is famous for its delta-transfer algorithm, which reduces the amount of data sent over the network by sending only the differences between the source files and the existing files in the destination. RSYNC is widely used for backups and mirroring and as an improved copy command for everyday use. -[More about Rsync](https://du.cesnet.cz/en/navody/rsync/start#pro_bezne_uzivatele) +RSYNC finds files that need to be transferred using a "quick check" algorithm (by default) that looks for files that have changed in size or in last-modified time. Any changes in the other preserved attributes (as requested by options) are made on the destination file directly when the quick check indicates that the file's data does not need to be updated. + +[More about RSYNC](https://du.cesnet.cz/en/navody/rsync/start#pro_bezne_uzivatele) Transfer large files to/from CESNET storage, assuming membership in the Storage VO diff --git a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md index 6263d2c68cea9f4551365a96386e3fcdf79a4f9c..fd24099552c23b8c56be5b0c995d0f0995b19744 100644 --- a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md +++ b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md @@ -122,7 +122,7 @@ However this method does not seem to work with recent Linux distributions and yo ## Gnome on Windows -Use Xlaunch to start the Xming server or run the XWin.exe. Select the "One window" mode. +Use XLaunch to start the Xming server or run the XWin.exe. Select the "One window" mode. Log in to the cluster, using PuTTY. On the cluster, run the gnome-session command. diff --git a/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md b/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md index 6f45c5a38f65df2f190eb5834ae8903164ae7057..003ac83a0236254faf259f4bfcaf7a369d41a866 100644 --- a/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md +++ b/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md @@ -107,4 +107,4 @@ In this example, we add an additional public key, stored in file additional_key. ## How to Remove Your Own Key -Removing your key from authorized_keys can be done simply by deleting the corresponding public key which can be identified by a comment at the end of line (eg. `username@organization.example.com`). +Removing your key from authorized_keys can be done simply by deleting the corresponding public key which can be identified by a comment at the end of line (e.g. _username@organization.example.com_). diff --git a/docs.it4i/salomon/environment-and-modules.md b/docs.it4i/salomon/environment-and-modules.md index d52d0e2b56fc20ca5cc8576487ce74de33780b9e..56a095c7e359aeae48186c28a494e8995d542178 100644 --- a/docs.it4i/salomon/environment-and-modules.md +++ b/docs.it4i/salomon/environment-and-modules.md @@ -24,7 +24,7 @@ fi ``` !!! note - Do not run commands outputting to standard output (echo, module list, etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental functionality (scp, PBS) of your account! Take care for SSH session interactivity for such commands as stated in the previous example. + Do not run commands outputting to standard output (echo, module list, etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental functionality (SCP, PBS) of your account! Take care for SSH session interactivity for such commands as stated in the previous example. ### Application Modules diff --git a/docs.it4i/salomon/introduction.md b/docs.it4i/salomon/introduction.md index 83ff79221fc01aadbf0cfa1258220778bc275308..bc466a8d89a4fb78292e076744ca35563511a646 100644 --- a/docs.it4i/salomon/introduction.md +++ b/docs.it4i/salomon/introduction.md @@ -1,6 +1,6 @@ # Introduction -Welcome to Salomon supercomputer cluster. The Salomon cluster consists of 1008 compute nodes, totaling 24192 compute cores with 129 TB RAM and giving over 2 Pflop/s theoretical peak performance. Each node is a powerful x86-64 computer, equipped with 24 cores, at least 128 GB RAM. Nodes are interconnected by 7D Enhanced hypercube InfiniBand network and equipped with Intel Xeon E5-2680v3 processors. The Salomon cluster consists of 576 nodes without accelerators and 432 nodes equipped with Intel Xeon Phi MIC accelerators. Read more in [Hardware Overview](hardware-overview/). +Welcome to Salomon supercomputer cluster. The Salomon cluster consists of 1008 compute nodes, totaling 24192 compute cores with 129 TB RAM and giving over 2 PFLOP/s theoretical peak performance. Each node is a powerful x86-64 computer, equipped with 24 cores, at least 128 GB RAM. Nodes are interconnected by 7D Enhanced hypercube InfiniBand network and equipped with Intel Xeon E5-2680v3 processors. The Salomon cluster consists of 576 nodes without accelerators and 432 nodes equipped with Intel Xeon Phi MIC accelerators. Read more in [Hardware Overview](hardware-overview/). The cluster runs [CentOS Linux](http://www.bull.com/bullx-logiciels/systeme-exploitation.html) operating system, which is compatible with the RedHat [Linux family.](http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg) diff --git a/docs.it4i/salomon/job-submission-and-execution.md b/docs.it4i/salomon/job-submission-and-execution.md index 20fc147e7c43be7a344302dbce663218b65870d1..8330fdd3e44ab1dd59dd7ad12658809a75d02152 100644 --- a/docs.it4i/salomon/job-submission-and-execution.md +++ b/docs.it4i/salomon/job-submission-and-execution.md @@ -459,7 +459,7 @@ cd $SCRDIR || exit cp $PBS_O_WORKDIR/input . cp $PBS_O_WORKDIR/mympiprog.x . -# load the mpi module +# load the MPI module module load OpenMPI # execute the calculation @@ -497,7 +497,7 @@ Example jobscript for an MPI job with preloaded inputs and executables, options SCRDIR=/scratch/work/user/$USER/myjob cd $SCRDIR || exit -# load the mpi module +# load the MPI module module load OpenMPI # execute the calculation diff --git a/docs.it4i/salomon/software/ansys/ansys-cfx.md b/docs.it4i/salomon/software/ansys/ansys-cfx.md new file mode 100644 index 0000000000000000000000000000000000000000..2cf29101dd4ab162de9c7f52e2b19a58cc715f42 --- /dev/null +++ b/docs.it4i/salomon/software/ansys/ansys-cfx.md @@ -0,0 +1,55 @@ +# ANSYS CFX + +[ANSYS CFX](http://www.ansys.com/products/fluids/ansys-cfx) software is a high-performance, general purpose fluid dynamics program that has been applied to solve wide-ranging fluid flow problems for over 20 years. At the heart of ANSYS CFX is its advanced solver technology, the key to achieving reliable and accurate solutions quickly and robustly. The modern, highly parallelized solver is the foundation for an abundant choice of physical models to capture virtually any type of phenomena related to fluid flow. The solver and its many physical models are wrapped in a modern, intuitive, and flexible GUI and user environment, with extensive capabilities for customization and automation using session files, scripting and a powerful expression language. + +To run ANSYS CFX in batch mode you can utilize/modify the default cfx.pbs script and execute it via the qsub command. + +```bash +#!/bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-CFX-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> (working directory must exists) +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +module load ansys + +#### Set number of processors per host listing +#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) +procs_per_host=1 +#### Create host list +hl="" +for host in `cat $PBS_NODEFILE` +do + if ["$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi +done + +echo Machines: $hl + +#-dev input.def includes the input of CFX analysis in DEF format +#-P the name of prefered license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial)) +/ansys_inc/v145/CFX/bin/cfx5solve -def input.def -size 4 -size-ni 4x -part-large -start-method "Platform MPI Distributed Parallel" -par-dist $hl -P aa_r +``` + +Header of the pbs file (above) is common and description can be find on [this site](../../job-submission-and-execution/). SVS FEM recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. + +Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. >Input file has to be defined by common CFX def file which is attached to the CFX solver via parameter -def + +**License** should be selected by parameter -P (Big letter **P**). Licensed products are the following: aa_r (ANSYS **Academic** Research), ane3fl (ANSYS Multiphysics)-**Commercial**. +[More about licensing here](licensing/) diff --git a/docs.it4i/salomon/software/ansys/ansys-fluent.md b/docs.it4i/salomon/software/ansys/ansys-fluent.md new file mode 100644 index 0000000000000000000000000000000000000000..f4867b5c75bcaa7b3e52400b601dd1d0cce43fe3 --- /dev/null +++ b/docs.it4i/salomon/software/ansys/ansys-fluent.md @@ -0,0 +1,162 @@ +# ANSYS Fluent + +[ANSYS Fluent](http://www.ansys.com/products/fluids/ansys-fluent) +software contains the broad physical modeling capabilities needed to model flow, turbulence, heat transfer, and reactions for industrial applications ranging from air flow over an aircraft wing to combustion in a furnace, from bubble columns to oil platforms, from blood flow to semiconductor manufacturing, and from clean room design to wastewater treatment plants. Special models that give the software the ability to model in-cylinder combustion, aeroacoustics, turbomachinery, and multiphase systems have served to broaden its reach. + +1. Common way to run Fluent over PBS file + +To run ANSYS Fluent in batch mode you can utilize/modify the default fluent.pbs script and execute it via the qsub command. + +```bash +#!/bin/bash +#PBS -S /bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-Fluent-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> (working directory must exists) +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +#### Load ansys module so that we find the cfx5solve command +module load ansys + +# Use following line to specify MPI for message-passing instead +NCORES=`wc -l $PBS_NODEFILE |awk '{print $1}'` + +/ansys_inc/v145/fluent/bin/fluent 3d -t$NCORES -cnf=$PBS_NODEFILE -g -i fluent.jou +``` + +Header of the pbs file (above) is common and description can be find on [this site](../../resources-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. + +Working directory has to be created before sending pbs job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common Fluent journal file which is attached to the Fluent solver via parameter -i fluent.jou + +Journal file with definition of the input geometry and boundary conditions and defined process of solution has e.g. the following structure: + +```bash + /file/read-case aircraft_2m.cas.gz + /solve/init + init + /solve/iterate + 10 + /file/write-case-dat aircraft_2m-solution + /exit yes +``` + +The appropriate dimension of the problem has to be set by parameter (2d/3d). + +1. Fast way to run Fluent from command line + +```bash +fluent solver_version [FLUENT_options] -i journal_file -pbs +``` + +This syntax will start the ANSYS FLUENT job under PBS Professional using the qsub command in a batch manner. When resources are available, PBS Professional will start the job and return a job ID, usually in the form of _job_ID.hostname_. This job ID can then be used to query, control, or stop the job using standard PBS Professional commands, such as qstat or qdel. The job will be run out of the current working directory, and all output will be written to the file fluent.o _job_ID_. + +1. Running Fluent via user's config file + +The sample script uses a configuration file called pbs_fluent.conf if no command line arguments are present. This configuration file should be present in the directory from which the jobs are submitted (which is also the directory in which the jobs are executed). The following is an example of what the content of pbs_fluent.conf can be: + +```bash +input="example_small.flin" +case="Small-1.65m.cas" +fluent_args="3d -pmyrinet" +outfile="fluent_test.out" +mpp="true" +``` + +The following is an explanation of the parameters: + +input is the name of the input file. + +case is the name of the .cas file that the input file will utilize. + +fluent_args are extra ANSYS FLUENT arguments. As shown in the previous example, you can specify the interconnect by using the -p interconnect command. The available interconnects include ethernet (the default), myrinet, infiniband, vendor, altix, and crayx. The MPI is selected automatically, based on the specified interconnect. + +outfile is the name of the file to which the standard output will be sent. + +mpp="true" will tell the job script to execute the job across multiple processors. + +To run ANSYS Fluent in batch mode with user's config file you can utilize/modify the following script and execute it via the qsub command. + +```bash +#!/bin/sh +#PBS -l nodes=2:ppn=4 +#PBS -1 qprod +#PBS -N $USE-Fluent-Project +#PBS -A XX-YY-ZZ + + cd $PBS_O_WORKDIR + + #We assume that if they didn’t specify arguments then they should use the + #config file if ["xx${input}${case}${mpp}${fluent_args}zz" = "xxzz" ]; then + if [ -f pbs_fluent.conf ]; then + . pbs_fluent.conf + else + printf "No command line arguments specified, " + printf "and no configuration file found. Exiting n" + fi + fi + + + #Augment the ANSYS FLUENT command line arguments case "$mpp" in + true) + #MPI job execution scenario + num_nodes=‘cat $PBS_NODEFILE | sort -u | wc -l‘ + cpus=‘expr $num_nodes * $NCPUS‘ + #Default arguments for mpp jobs, these should be changed to suit your + #needs. + fluent_args="-t${cpus} $fluent_args -cnf=$PBS_NODEFILE" + ;; + *) + #SMP case + #Default arguments for smp jobs, should be adjusted to suit your + #needs. + fluent_args="-t$NCPUS $fluent_args" + ;; + esac + #Default arguments for all jobs + fluent_args="-ssh -g -i $input $fluent_args" + + echo "---------- Going to start a fluent job with the following settings: + Input: $input + Case: $case + Output: $outfile + Fluent arguments: $fluent_args" + + #run the solver + /ansys_inc/v145/fluent/bin/fluent $fluent_args > $outfile +``` + +It runs the jobs out of the directory from which they are submitted (PBS_O_WORKDIR). + +1. Running Fluent in parralel + +Fluent could be run in parallel only under Academic Research license. To do so this ANSYS Academic Research license must be placed before ANSYS CFD license in user preferences. To make this change anslic_admin utility should be run + +```bash +/ansys_inc/shared_les/licensing/lic_admin/anslic_admin +``` + +ANSLIC_ADMIN Utility will be run + + + + + + + +ANSYS Academic Research license should be moved up to the top of the list. + + diff --git a/docs.it4i/salomon/software/ansys/ansys-ls-dyna.md b/docs.it4i/salomon/software/ansys/ansys-ls-dyna.md new file mode 100644 index 0000000000000000000000000000000000000000..89f53c988715cee93da0af78988b42a49438f6e1 --- /dev/null +++ b/docs.it4i/salomon/software/ansys/ansys-ls-dyna.md @@ -0,0 +1,59 @@ +# ANSYS LS-DYNA + +**[ANSYSLS-DYNA](http://www.ansys.com/products/structures/ansys-ls-dyna)** software provides convenient and easy-to-use access to the technology-rich, time-tested explicit solver without the need to contend with the complex input requirements of this sophisticated program. Introduced in 1996, ANSYS LS-DYNA capabilities have helped customers in numerous industries to resolve highly intricate design issues. ANSYS Mechanical users have been able take advantage of complex explicit solutions for a long time utilizing the traditional ANSYS Parametric Design Language (APDL) environment. These explicit capabilities are available to ANSYS Workbench users as well. The Workbench platform is a powerful, comprehensive, easy-to-use environment for engineering simulation. CAD import from all sources, geometry cleanup, automatic meshing, solution, parametric optimization, result visualization and comprehensive report generation are all available within a single fully interactive modern graphical user environment. + +To run ANSYS LS-DYNA in batch mode you can utilize/modify the default ansysdyna.pbs script and execute it via the qsub command. + +```bash +#!/bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-DYNA-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +#! Counts the number of processors +NPROCS=`wc -l < $PBS_NODEFILE` + +echo This job has allocated $NPROCS nodes + +module load ansys + +#### Set number of processors per host listing +#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) +procs_per_host=1 +#### Create host list +hl="" +for host in `cat $PBS_NODEFILE` +do + if ["$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi +done + +echo Machines: $hl + +/ansys_inc/v145/ansys/bin/ansys145 -dis -lsdynampp i=input.k -machines $hl +``` + +<<<<<<< HEAD +Header of the pbs file (above) is common and description can be find on [this site](../../job-submission-and-execution/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. +======= +Header of the PBS file (above) is common and description can be find on [this site](../../resource-allocation-and-job-execution/job-submission-and-execution/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. +>>>>>>> Spelling corrections + +Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common LS-DYNA .**k** file which is attached to the ansys solver via parameter i= diff --git a/docs.it4i/salomon/software/ansys/ansys-mechanical-apdl.md b/docs.it4i/salomon/software/ansys/ansys-mechanical-apdl.md new file mode 100644 index 0000000000000000000000000000000000000000..0bde6f3a1c8b5d729f0de715863186b189beb74d --- /dev/null +++ b/docs.it4i/salomon/software/ansys/ansys-mechanical-apdl.md @@ -0,0 +1,56 @@ +# ANSYS MAPDL + +**[ANSYS Multiphysics](http://www.ansys.com/products/multiphysics)** +software offers a comprehensive product solution for both multiphysics and single-physics analysis. The product includes structural, thermal, fluid and both high- and low-frequency electromagnetic analysis. The product also contains solutions for both direct and sequentially coupled physics problems including direct coupled-field elements and the ANSYS multi-field solver. + +To run ANSYS MAPDL in batch mode you can utilize/modify the default mapdl.pbs script and execute it via the qsub command. + +```bash +#!/bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-ANSYS-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> (working directory must exists) +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +module load ansys + +#### Set number of processors per host listing +#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) +procs_per_host=1 +#### Create host list +hl="" +for host in `cat $PBS_NODEFILE` +do + if ["$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi +done + +echo Machines: $hl + +#-i input.dat includes the input of analysis in APDL format +#-o file.out is output file from ansys where all text outputs will be redirected +#-p the name of license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial), aa_r_dy=Academic AUTODYN) +/ansys_inc/v145/ansys/bin/ansys145 -b -dis -p aa_r -i input.dat -o file.out -machines $hl -dir $WORK_DIR +``` + +Header of the PBS file (above) is common and description can be find on [this site](../../resources-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. + +Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common APDL file which is attached to the ansys solver via parameter -i + +**License** should be selected by parameter -p. Licensed products are the following: aa_r (ANSYS **Academic** Research), ane3fl (ANSYS Multiphysics)-**Commercial**, aa_r_dy (ANSYS **Academic** AUTODYN) [More about licensing here](licensing/) diff --git a/docs.it4i/salomon/software/ansys/ansys.md b/docs.it4i/salomon/software/ansys/ansys.md new file mode 100644 index 0000000000000000000000000000000000000000..a5cac322dbc77eb794401e4dd964dd79b9fbbc23 --- /dev/null +++ b/docs.it4i/salomon/software/ansys/ansys.md @@ -0,0 +1,15 @@ +# Overview of ANSYS Products + +**[SVS FEM](http://www.svsfem.cz/)** as **[ANSYS Channel partner](http://www.ansys.com/)** for Czech Republic provided all ANSYS licenses for ANSELM cluster and supports of all ANSYS Products (Multiphysics, Mechanical, MAPDL, CFX, Fluent, Maxwell, LS-DYNA...) to IT staff and ANSYS users. If you are challenging to problem of ANSYS functionality contact please [hotline@svsfem.cz](mailto:hotline@svsfem.cz?subject=Ostrava%20-%20ANSELM) + +Anselm provides as commercial as academic variants. Academic variants are distinguished by "**Academic...**" word in the name of license or by two letter preposition "**aa\_**" in the license feature name. Change of license is realized on command line respectively directly in user's PBS file (see individual products). [More about licensing here](licensing/) + +To load the latest version of any ANSYS product (Mechanical, Fluent, CFX, MAPDL,...) load the module: + +```bash + $ module load ansys +``` + +ANSYS supports interactive regime, but due to assumed solution of extremely difficult tasks it is not recommended. + +If user needs to work in interactive regime we recommend to configure the RSM service on the client machine which allows to forward the solution to the Anselm directly from the client's Workbench project (see ANSYS RSM service). diff --git a/docs.it4i/salomon/software/chemistry/nwchem.md b/docs.it4i/salomon/software/chemistry/nwchem.md new file mode 100644 index 0000000000000000000000000000000000000000..465adf2042cca69f801af48a2eb104f215a44187 --- /dev/null +++ b/docs.it4i/salomon/software/chemistry/nwchem.md @@ -0,0 +1,43 @@ +# NWChem + +## Introduction + +NWChem aims to provide its users with computational chemistry tools that are scalable both in their ability to treat large scientific computational chemistry problems efficiently, and in their use of available parallel computing resources from high-performance parallel supercomputers to conventional workstation clusters. + +[Homepage](http://www.nwchem-sw.org/index.php/Main_Page) + +## Installed Versions + +The following versions are currently installed: + +* NWChem/6.3.revision2-2013-10-17-Python-2.7.8, current release. Compiled with Intel compilers, MKL and Intel MPI +* NWChem/6.5.revision26243-intel-2015b-2014-09-10-Python-2.7.8 + +For a current list of installed versions, execute: + +```bash + module avail NWChem +``` + +The recommend to use version 6.5. Version 6.3 fails on Salomon nodes with accelerator, because it attempts to communicate over scif0 interface. In 6.5 this is avoided by setting ARMCI_OPENIB_DEVICE=mlx4_0, this setting is included in the module. + +## Running + + NWChem is compiled for parallel MPI execution. Normal procedure for MPI jobs applies. Sample jobscript : + +```bash + #PBS -A IT4I-0-0 + #PBS -q qprod + #PBS -l select=1:ncpus=24:mpiprocs=24 + + cd $PBS_O_WORKDIR + module add NWChem/6.5.revision26243-intel-2015b-2014-09-10-Python-2.7.8 + mpirun nwchem h2o.nw +``` + +## Options + +Please refer to [the documentation](http://www.nwchem-sw.org/index.php/Release62:Top-level) and in the input file set the following directives : + +* MEMORY : controls the amount of memory NWChem will use +* SCRATCH_DIR : set this to a directory in [SCRATCH filesystem](../../storage/storage/) (or run the calculation completely in a scratch directory). For certain calculations, it might be advisable to reduce I/O by forcing "direct" mode, e.g. `scf direct` diff --git a/docs.it4i/salomon/software/debuggers/valgrind.md b/docs.it4i/salomon/software/debuggers/valgrind.md new file mode 100644 index 0000000000000000000000000000000000000000..5e6a2c8e8246888a60e7026bb04ab433b921f658 --- /dev/null +++ b/docs.it4i/salomon/software/debuggers/valgrind.md @@ -0,0 +1,265 @@ +# Valgrind + +## About Valgrind + +Valgrind is an open-source tool, used mainly for debuggig memory-related problems, such as memory leaks, use of uninitalized memory etc. in C/C++ applications. The toolchain was however extended over time with more functionality, such as debugging of threaded applications, cache profiling, not limited only to C/C++. + +Valgind is an extremely useful tool for debugging memory errors such as [off-by-one](http://en.wikipedia.org/wiki/Off-by-one_error). Valgrind uses a virtual machine and dynamic recompilation of binary code, because of that, you can expect that programs being debugged by Valgrind run 5-100 times slower. + +The main tools available in Valgrind are : + +* **Memcheck**, the original, must used and default tool. Verifies memory access in you program and can detect use of unitialized memory, out of bounds memory access, memory leaks, double free, etc. +* **Massif**, a heap profiler. +* **Hellgrind** and **DRD** can detect race conditions in multi-threaded applications. +* **Cachegrind**, a cache profiler. +* **Callgrind**, a callgraph analyzer. +* For a full list and detailed documentation, please refer to the [official Valgrind documentation](http://valgrind.org/docs/). + +## Installed Versions + +There are two versions of Valgrind available on the cluster. + +* Version 3.8.1, installed by operating system vendor in /usr/bin/valgrind. This version is available by default, without the need to load any module. This version however does not provide additional MPI support. Also, it does not support AVX2 instructions, debugging of an AVX2-enabled executable with this version will fail +* Version 3.11.0 built by ICC with support for Intel MPI, available in module Valgrind/3.11.0-intel-2015b. After loading the module, this version replaces the default valgrind. +* Version 3.11.0 built by GCC with support for Open MPI, module Valgrind/3.11.0-foss-2015b + +## Usage + +Compile the application which you want to debug as usual. It is advisable to add compilation flags -g (to add debugging information to the binary so that you will see original source code lines in the output) and -O0 (to disable compiler optimizations). + +For example, lets look at this C code, which has two problems: + +```cpp + #include <stdlib.h> + + void f(void) + { + int* x = malloc(10 * sizeof(int)); + x[10] = 0; // problem 1: heap block overrun + } // problem 2: memory leak -- x not freed + + int main(void) + { + f(); + return 0; + } +``` + +Now, compile it with Intel compiler: + +```bash + $ module add intel + $ icc -g valgrind-example.c -o valgrind-example +``` + +Now, lets run it with Valgrind. The syntax is: + +valgrind [valgrind options] < your program binary > [your program options] + +If no Valgrind options are specified, Valgrind defaults to running Memcheck tool. Please refer to the Valgrind documentation for a full description of command line options. + +```bash + $ valgrind ./valgrind-example + ==12652== Memcheck, a memory error detector + ==12652== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==12652== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==12652== Command: ./valgrind-example + ==12652== + ==12652== Invalid write of size 4 + ==12652== at 0x40053E: f (valgrind-example.c:6) + ==12652== by 0x40054E: main (valgrind-example.c:11) + ==12652== Address 0x5861068 is 0 bytes after a block of size 40 alloc'd + ==12652== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==12652== by 0x400528: f (valgrind-example.c:5) + ==12652== by 0x40054E: main (valgrind-example.c:11) + ==12652== + ==12652== + ==12652== HEAP SUMMARY: + ==12652== in use at exit: 40 bytes in 1 blocks + ==12652== total heap usage: 1 allocs, 0 frees, 40 bytes allocated + ==12652== + ==12652== LEAK SUMMARY: + ==12652== definitely lost: 40 bytes in 1 blocks + ==12652== indirectly lost: 0 bytes in 0 blocks + ==12652== possibly lost: 0 bytes in 0 blocks + ==12652== still reachable: 0 bytes in 0 blocks + ==12652== suppressed: 0 bytes in 0 blocks + ==12652== Rerun with --leak-check=full to see details of leaked memory + ==12652== + ==12652== For counts of detected and suppressed errors, rerun with: -v + ==12652== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 6 from 6) +``` + +In the output we can see that Valgrind has detected both errors - the off-by-one memory access at line 5 and a memory leak of 40 bytes. If we want a detailed analysis of the memory leak, we need to run Valgrind with --leak-check=full option: + +```bash + $ valgrind --leak-check=full ./valgrind-example + ==23856== Memcheck, a memory error detector + ==23856== Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al. + ==23856== Using Valgrind-3.6.0 and LibVEX; rerun with -h for copyright info + ==23856== Command: ./valgrind-example + ==23856== + ==23856== Invalid write of size 4 + ==23856== at 0x40067E: f (valgrind-example.c:6) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== Address 0x66e7068 is 0 bytes after a block of size 40 alloc'd + ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) + ==23856== by 0x400668: f (valgrind-example.c:5) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== + ==23856== + ==23856== HEAP SUMMARY: + ==23856== in use at exit: 40 bytes in 1 blocks + ==23856== total heap usage: 1 allocs, 0 frees, 40 bytes allocated + ==23856== + ==23856== 40 bytes in 1 blocks are definitely lost in loss record 1 of 1 + ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) + ==23856== by 0x400668: f (valgrind-example.c:5) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== + ==23856== LEAK SUMMARY: + ==23856== definitely lost: 40 bytes in 1 blocks + ==23856== indirectly lost: 0 bytes in 0 blocks + ==23856== possibly lost: 0 bytes in 0 blocks + ==23856== still reachable: 0 bytes in 0 blocks + ==23856== suppressed: 0 bytes in 0 blocks + ==23856== + ==23856== For counts of detected and suppressed errors, rerun with: -v + ==23856== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 6 from 6) +``` + +Now we can see that the memory leak is due to the malloc() at line 6. + +## Usage With MPI + +Although Valgrind is not primarily a parallel debugger, it can be used to debug parallel applications as well. When launching your parallel applications, prepend the valgrind command. For example: + +```bash + $ mpirun -np 4 valgrind myapplication +``` + +The default version without MPI support will however report a large number of false errors in the MPI library, such as: + +```bash + ==30166== Conditional jump or move depends on uninitialised value(s) + ==30166== at 0x4C287E8: strlen (mc_replace_strmem.c:282) + ==30166== by 0x55443BD: I_MPI_Processor_model_number (init_interface.c:427) + ==30166== by 0x55439E0: I_MPI_Processor_arch_code (init_interface.c:171) + ==30166== by 0x558D5AE: MPID_nem_impi_init_shm_configuration (mpid_nem_impi_extensions.c:1091) + ==30166== by 0x5598F4C: MPID_nem_init_ckpt (mpid_nem_init.c:566) + ==30166== by 0x5598B65: MPID_nem_init (mpid_nem_init.c:489) + ==30166== by 0x539BD75: MPIDI_CH3_Init (ch3_init.c:64) + ==30166== by 0x5578743: MPID_Init (mpid_init.c:193) + ==30166== by 0x554650A: MPIR_Init_thread (initthread.c:539) + ==30166== by 0x553369F: PMPI_Init (init.c:195) + ==30166== by 0x4008BD: main (valgrind-example-mpi.c:18) +``` + +so it is better to use the MPI-enabled valgrind from module. The MPI versions requires library: + +$EBROOTVALGRIND/lib/valgrind/libmpiwrap-amd64-linux.so + +which must be included in the LD_PRELOAD environment variable. + +Lets look at this MPI example: + +```cpp + #include <stdlib.h> + #include <mpi.h> + + int main(int argc, char *argv[]) + { + int *data = malloc(sizeof(int)*99); + + MPI_Init(&argc, &argv); + MPI_Bcast(data, 100, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Finalize(); + + return 0; + } +``` + +There are two errors - use of uninitialized memory and invalid length of the buffer. Lets debug it with valgrind : + +```bash + $ module add intel impi + $ mpiicc -g valgrind-example-mpi.c -o valgrind-example-mpi + $ module add Valgrind/3.11.0-intel-2015b + $ mpirun -np 2 -env LD_PRELOAD $EBROOTVALGRIND/lib/valgrind/libmpiwrap-amd64-linux.so valgrind ./valgrind-example-mpi +``` + +Prints this output : (note that there is output printed for every launched MPI process) + +```bash + ==31318== Memcheck, a memory error detector + ==31318== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==31318== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==31318== Command: ./valgrind-example-mpi + ==31318== + ==31319== Memcheck, a memory error detector + ==31319== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==31319== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==31319== Command: ./valgrind-example-mpi + ==31319== + valgrind MPI wrappers 31319: Active for pid 31319 + valgrind MPI wrappers 31319: Try MPIWRAP_DEBUG=help for possible options + valgrind MPI wrappers 31318: Active for pid 31318 + valgrind MPI wrappers 31318: Try MPIWRAP_DEBUG=help for possible options + ==31319== Unaddressable byte(s) found during client check request + ==31319== at 0x4E35974: check_mem_is_addressable_untyped (libmpiwrap.c:960) + ==31319== by 0x4E5D0FE: PMPI_Bcast (libmpiwrap.c:908) + ==31319== by 0x400911: main (valgrind-example-mpi.c:20) + ==31319== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd + ==31319== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31319== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31319== + ==31318== Uninitialised byte(s) found during client check request + ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) + ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) + ==31318== by 0x400911: main (valgrind-example-mpi.c:20) + ==31318== Address 0x6929040 is 0 bytes inside a block of size 396 alloc'd + ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31318== + ==31318== Unaddressable byte(s) found during client check request + ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) + ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) + ==31318== by 0x400911: main (valgrind-example-mpi.c:20) + ==31318== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd + ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31318== + ==31318== + ==31318== HEAP SUMMARY: + ==31318== in use at exit: 3,172 bytes in 67 blocks + ==31318== total heap usage: 191 allocs, 124 frees, 81,203 bytes allocated + ==31318== + ==31319== + ==31319== HEAP SUMMARY: + ==31319== in use at exit: 3,172 bytes in 67 blocks + ==31319== total heap usage: 175 allocs, 108 frees, 48,435 bytes allocated + ==31319== + ==31318== LEAK SUMMARY: + ==31318== definitely lost: 408 bytes in 3 blocks + ==31318== indirectly lost: 256 bytes in 1 blocks + ==31318== possibly lost: 0 bytes in 0 blocks + ==31318== still reachable: 2,508 bytes in 63 blocks + ==31318== suppressed: 0 bytes in 0 blocks + ==31318== Rerun with --leak-check=full to see details of leaked memory + ==31318== + ==31318== For counts of detected and suppressed errors, rerun with: -v + ==31318== Use --track-origins=yes to see where uninitialised values come from + ==31318== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 4 from 4) + ==31319== LEAK SUMMARY: + ==31319== definitely lost: 408 bytes in 3 blocks + ==31319== indirectly lost: 256 bytes in 1 blocks + ==31319== possibly lost: 0 bytes in 0 blocks + ==31319== still reachable: 2,508 bytes in 63 blocks + ==31319== suppressed: 0 bytes in 0 blocks + ==31319== Rerun with --leak-check=full to see details of leaked memory + ==31319== + ==31319== For counts of detected and suppressed errors, rerun with: -v + ==31319== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4) +``` + +We can see that Valgrind has reported use of uninitialised memory on the master process (which reads the array to be broadcasted) and use of unaddressable memory on both processes. diff --git a/docs.it4i/salomon/software/numerical-languages/octave.md b/docs.it4i/salomon/software/numerical-languages/octave.md new file mode 100644 index 0000000000000000000000000000000000000000..7484d97a349b25178c75acb08c2f52db2595f4cc --- /dev/null +++ b/docs.it4i/salomon/software/numerical-languages/octave.md @@ -0,0 +1,56 @@ +# Octave + +GNU Octave is a high-level interpreted language, primarily intended for numerical computations. It provides capabilities for the numerical solution of linear and nonlinear problems, and for performing other numerical experiments. It also provides extensive graphics capabilities for data visualization and manipulation. Octave is normally used through its interactive command line interface, but it can also be used to write non-interactive programs. The Octave language is quite similar to Matlab so that most programs are easily portable. Read more on <http://www.gnu.org/software/octave/> + +Two versions of octave are available on the cluster, via module + +| Status | Version | module | +| ---------- | ------------ | ------ | +| **Stable** | Octave 3.8.2 | Octave | + +```bash + $ module load Octave +``` + +The octave on the cluster is linked to highly optimized MKL mathematical library. This provides threaded parallelization to many octave kernels, notably the linear algebra subroutines. Octave runs these heavy calculation kernels without any penalty. By default, octave would parallelize to 24 threads. You may control the threads by setting the OMP_NUM_THREADS environment variable. + +To run octave interactively, log in with ssh -X parameter for X11 forwarding. Run octave: + +```bash + $ octave +``` + +To run octave in batch mode, write an octave script, then write a bash jobscript and execute via the qsub command. By default, octave will use 16 threads when running MKL kernels. + +```bash + #!/bin/bash + + # change to local scratch directory + mkdir -p /scratch/work/user/$USER/$PBS_JOBID + cd /scratch/work/user/$USER/$PBS_JOBID || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/octcode.m . + + # load octave module + module load Octave + + # execute the calculation + octave -q --eval octcode > output.out + + # copy output file to home + cp output.out $PBS_O_WORKDIR/. + + #exit + exit +``` + +This script may be submitted directly to the PBS workload manager via the qsub command. The inputs are in octcode.m file, outputs in output.out file. See the single node jobscript example in the [Job execution section](../../). + +The octave c compiler mkoctfile calls the GNU gcc 4.8.1 for compiling native c code. This is very useful for running native c subroutines in octave environment. + +```bash + $ mkoctfile -v +``` + +Octave may use MPI for inter-process communication This functionality is currently not supported on the cluster cluster. In case you require the octave interface to MPI, please contact our [cluster support](https://support.it4i.cz/rt/). diff --git a/docs.it4i/software/bio/omics-master/diagnostic-component-team.md b/docs.it4i/software/bio/omics-master/diagnostic-component-team.md index ab091fd1e1615e820d0b660424b8fab39a36642f..f54fe184ada0258782d69e629ed23d58bea39958 100644 --- a/docs.it4i/software/bio/omics-master/diagnostic-component-team.md +++ b/docs.it4i/software/bio/omics-master/diagnostic-component-team.md @@ -11,7 +11,7 @@ TEAM is available at the [following address](http://omics.it4i.cz/team/) VCF files are scanned by this diagnostic tool for known diagnostic disease-associated variants. When no diagnostic mutation is found, the file can be sent to the disease-causing gene discovery tool to see whether new disease associated variants can be found. -TEAM (27) is an intuitive and easy-to-use web tool that fills the gap between the predicted mutations and the final diagnostic in targeted enrichment sequencing analysis. The tool searches for known diagnostic mutations, corresponding to a disease panel, among the predicted patient’s variants. Diagnostic variants for the disease are taken from four databases of disease-related variants (HGMD-public, HUMSAVAR , ClinVar and COSMIC) If no primary diagnostic variant is found, then a list of secondary findings that can help to establish a diagnostic is produced. TEAM also provides with an interface for the definition of and customization of panels, by means of which, genes and mutations can be added or discarded to adjust panel definitions. +TEAM (27) is an intuitive and easy-to-use web tool that fills the gap between the predicted mutations and the final diagnostic in targeted enrichment sequencing analysis. The tool searches for known diagnostic mutations, corresponding to a disease panel, among the predicted patient’s variants. Diagnostic variants for the disease are taken from four databases of disease-related variants (HGMD, HUMSAVAR , ClinVar and COSMIC) If no primary diagnostic variant is found, then a list of secondary findings that can help to establish a diagnostic is produced. TEAM also provides with an interface for the definition of and customization of panels, by means of which, genes and mutations can be added or discarded to adjust panel definitions.  diff --git a/docs.it4i/software/cae/comsol/comsol-multiphysics.md b/docs.it4i/software/cae/comsol/comsol-multiphysics.md index 4827d2d4a97ec3f141b0cbf3a18e25ee85ead9b6..7a5d7d11fc39ee9b2e07ccb2326e0953b209c5b7 100644 --- a/docs.it4i/software/cae/comsol/comsol-multiphysics.md +++ b/docs.it4i/software/cae/comsol/comsol-multiphysics.md @@ -117,4 +117,4 @@ cd /apps/cae/COMSOL/51/mli matlab -nodesktop -nosplash -r "mphstart; addpath /scratch/work/user/$USER/work; test_job" ``` -This example shows how to run Livelink for MATLAB with following configuration: 3 nodes and 24 cores per node. Working directory has to be created before submitting (comsol_matlab.pbs) job script into the queue. Input file (test_job.m) has to be in working directory or full path to input file has to be specified. The Matlab command option (-r ”mphstart”) created a connection with a COMSOL server using the default port number. +This example shows how to run LiveLink for MATLAB with following configuration: 3 nodes and 16 cores per node. Working directory has to be created before submitting (comsol_matlab.pbs) job script into the queue. Input file (test_job.m) has to be in working directory or full path to input file has to be specified. The Matlab command option (-r ”mphstart”) created a connection with a COMSOL server using the default port number. diff --git a/docs.it4i/software/cae/comsol/licensing-and-available-versions.md b/docs.it4i/software/cae/comsol/licensing-and-available-versions.md index 4358b930fedbfcdf3ea9277d2fa5c89e8a74ca37..be5a15f251c6b32dd3d432da682ef9651c3683ce 100644 --- a/docs.it4i/software/cae/comsol/licensing-and-available-versions.md +++ b/docs.it4i/software/cae/comsol/licensing-and-available-versions.md @@ -1,6 +1,6 @@ # Licensing and Available Versions -## Comsol Licence Can Be Used By: +## Comsol License Can Be Used By: * all persons in the carrying out of the CE IT4Innovations Project (In addition to the primary licensee, which is VSB - Technical University of Ostrava, users are CE IT4Innovations third parties - CE IT4Innovations project partners, particularly the University of Ostrava, the Brno University of Technology - Faculty of Informatics, the Silesian University in Opava, Institute of Geonics AS CR.) * all persons who have a valid license diff --git a/docs.it4i/software/chemistry/phono3py.md b/docs.it4i/software/chemistry/phono3py.md index 884f25cbf89a4b43afa3afc9a02fdbec171c9383..9aa2faa7d95b6d9a9bee6f1135efa37b9577f8fa 100644 --- a/docs.it4i/software/chemistry/phono3py.md +++ b/docs.it4i/software/chemistry/phono3py.md @@ -90,7 +90,7 @@ Once all jobs are finished and vasprun.xml is created in each disp-XXXXX directo $ phono3py --cf3 disp-{00001..00111}/vasprun.xml ``` -and `disp_fc2.yaml, FORCES_FC2`, `FORCES_FC3` and disp_fc3.yaml should appear and put into the hdf format by +and `disp_fc2.yaml, FORCES_FC2`, `FORCES_FC3` and disp_fc3.yaml should appear and put into the HDF format by ```console $ phono3py --dim="2 2 2" -c POSCAR diff --git a/docs.it4i/software/compilers.md b/docs.it4i/software/compilers.md index e264a458cb482a9bf0cd2ad684e227a01ef83987..f35b01f254d2fd873ba9452f1522036e0b26efb4 100644 --- a/docs.it4i/software/compilers.md +++ b/docs.it4i/software/compilers.md @@ -12,7 +12,7 @@ There are several compilers for different programming languages available on the The C/C++ and Fortran compilers are provided by: -Opensource: +Open source: * GNU GCC * Clang/LLVM @@ -83,11 +83,11 @@ For more information about the possibilities of the compilers, please see the ma UPC is supported by two compiler/runtime implementations: * GNU - SMP/multi-threading support only -* Berkley - multi-node support as well as SMP/multi-threading support +* Berkeley - multi-node support as well as SMP/multi-threading support ### GNU UPC Compiler -To use the GNU UPC compiler and run the compiled binaries use the module gupc +To use the GNU UPC compiler and run the compiled binaries use the module GUPC ```console $ module add gupc @@ -128,18 +128,18 @@ $ ./count.upc.x -fupc-threads-5 For more information see the man pages. -### Berkley UPC Compiler +### Berkeley UPC Compiler -To use the Berkley UPC compiler and runtime environment to run the binaries use the module bupc +To use the Berkeley UPC compiler and runtime environment to run the binaries use the module BUPC ```console $ module add BerkeleyUPC/2.16.2-gompi-2015b # on Anselm: ml bupc $ upcc -version ``` -As default UPC network the "smp" is used. This is very quick and easy way for testing/debugging, but limited to one node only. +As default UPC network the "SMP" is used. This is very quick and easy way for testing/debugging, but limited to one node only. -For production runs, it is recommended to use the native InfiniBand implementation of UPC network "ibv". For testing/debugging using multiple nodes, the "mpi" UPC network is recommended. +For production runs, it is recommended to use the native InfiniBand implementation of UPC network "IBV". For testing/debugging using multiple nodes, the "MPI" UPC network is recommended. !!! warning Selection of the network is done at the compile time and not at runtime (as expected)! @@ -163,7 +163,7 @@ int main() { } ``` -To compile the example with the "ibv" UPC network use +To compile the example with the "IBV" UPC network use ```console $ upcc -network=ibv -o hello.upc.x hello.upc diff --git a/docs.it4i/software/debuggers/allinea-performance-reports.md b/docs.it4i/software/debuggers/allinea-performance-reports.md index 14451cdb0a25a5704fcafb61b9b0a558b6df882c..530c00c5b028786e8833c72a24ae8ceae1db1c77 100644 --- a/docs.it4i/software/debuggers/allinea-performance-reports.md +++ b/docs.it4i/software/debuggers/allinea-performance-reports.md @@ -28,7 +28,7 @@ Instead of [running your MPI program the usual way](../mpi/mpi/), use the the pe $ perf-report mpirun ./mympiprog.x ``` -The mpi program will run as usual. The perf-report creates two additional files, in \*.txt and \*.html format, containing the performance report. Note that demanding MPI codes should be run within [the queue system](../../anselm/job-submission-and-execution/). +The MPI program will run as usual. The perf-report creates two additional files, in \*.txt and \*.html format, containing the performance report. Note that demanding MPI codes should be run within [the queue system](../../job-submission-and-execution/). ## Example diff --git a/docs.it4i/software/debuggers/cube.md b/docs.it4i/software/debuggers/cube.md index 4edf6ea02a445e633315ae4448c72e2c74a72fae..680a4e5053d088e68ed823ab4eb66a01224d0c45 100644 --- a/docs.it4i/software/debuggers/cube.md +++ b/docs.it4i/software/debuggers/cube.md @@ -30,7 +30,7 @@ CUBE is a graphical application. Refer to Graphical User Interface documentation !!! note Analyzing large data sets can consume large amount of CPU and RAM. Do not perform large analysis on login nodes. -After loading the appropriate module, simply launch cube command, or alternatively you can use scalasca -examine command to launch the GUI. Note that for Scalasca datasets, if you do not analyze the data with scalasca -examine before to opening them with CUBE, not all performance data will be available. +After loading the appropriate module, simply launch cube command, or alternatively you can use Scalasca -examine command to launch the GUI. Note that for Scalasca data sets, if you do not analyze the data with `scalasca -examine` before to opening them with CUBE, not all performance data will be available. References 1\. <http://www.scalasca.org/software/cube-4.x/download.html> diff --git a/docs.it4i/software/debuggers/intel-vtune-amplifier.md b/docs.it4i/software/debuggers/intel-vtune-amplifier.md index 8003916b32bb87ef712b2b412349388389e74d3e..8842e4f47d6b948a2263cc458f7ef78f39034ce1 100644 --- a/docs.it4i/software/debuggers/intel-vtune-amplifier.md +++ b/docs.it4i/software/debuggers/intel-vtune-amplifier.md @@ -22,7 +22,7 @@ $ qsub -q qexp -A OPEN-0-0 -I -l select=1,vtune=2016_update1 !!! warning Temporarily disabled - bug in the kernel ([intel-vtune-amplifier-impact-of-recent-os-security-updates](https://software.intel.com/en-us/articles/intel-vtune-amplifier-impact-of-recent-os-security-updates)) -After that, you can verify the modules sep\*, pax and vtsspp are present in the kernel : +After that, you can verify the modules `sep*`, `pax` and `vtsspp` are present in the kernel : ```console $ lsmod | grep -e sep -e pax -e vtsspp @@ -45,7 +45,7 @@ $ amplxe-gui The GUI will open in new window. Click on "New Project..." to create a new project. After clicking OK, a new window with project properties will appear. At "Application:", select the bath to your binary you want to profile (the binary should be compiled with -g flag). Some additional options such as command line arguments can be selected. At "Managed code profiling mode:" select "Native" (unless you want to profile managed mode .NET/Mono applications). After clicking OK, your project is created. -To run a new analysis, click "New analysis...". You will see a list of possible analysis. Some of them will not be possible on the current CPU (eg. Intel Atom analysis is not possible on Sandy bridge CPU), the GUI will show an error box if you select the wrong analysis. For example, select "Advanced Hotspots". Clicking on Start will start profiling of the application. +To run a new analysis, click "New analysis...". You will see a list of possible analysis. Some of them will not be possible on the current CPU (e.g. Intel Atom analysis is not possible on Sandy bridge CPU), the GUI will show an error box if you select the wrong analysis. For example, select "Advanced Hotspots". Clicking on Start will start profiling of the application. ## Remote Analysis diff --git a/docs.it4i/software/debuggers/papi.md b/docs.it4i/software/debuggers/papi.md index 15b03837e4bb18d2b8b6357d60bb29574b30fdae..a879f016497719ceb01c8b8decf4b2117add8a4b 100644 --- a/docs.it4i/software/debuggers/papi.md +++ b/docs.it4i/software/debuggers/papi.md @@ -10,13 +10,13 @@ PAPI can be used with parallel as well as serial programs. ## Usage -To use PAPI, load [module](../../modules-matrix/) papi: +To use PAPI, load [module](../../environment-and-modules/) PAPI: ```console $ ml papi ``` -This will load the default version. Execute module avail papi for a list of installed versions. +This will load the default version. Execute module avail pap for a list of installed versions. ## Utilities diff --git a/docs.it4i/software/debuggers/scalasca.md b/docs.it4i/software/debuggers/scalasca.md index f8f1db9d0249bf7f25b448f3e017aadddff08181..8cf2fa2cf1a49db66b062136801887535312a86a 100644 --- a/docs.it4i/software/debuggers/scalasca.md +++ b/docs.it4i/software/debuggers/scalasca.md @@ -23,13 +23,13 @@ Profiling a parallel application with Scalasca consists of three steps: ### Instrumentation -Instrumentation via " scalasca -instrument" is discouraged. Use [Score-P instrumentation](score-p/). +Instrumentation via `scalasca -instrument` is discouraged. Use [Score-P instrumentation](score-p/). ### Runtime Measurement After the application is instrumented, runtime measurement can be performed with the `scalasca -analyze` command. The syntax is: -`scalasca -analyze [scalasca options][launcher] [launcher options][program] [program options]` +`scalasca -analyze [scalasca options] [launcher] [launcher options] [program] [program options]` An example : @@ -61,7 +61,7 @@ If you do not wish to launch the GUI tool, use the "-s" option : scalasca -examine -s <experiment_directory> ``` -Alternatively you can open CUBE and load the data directly from here. Keep in mind that in that case the preprocessing is not done and not all metrics will be shown in the viewer. +Alternatively you can open CUBE and load the data directly from here. Keep in mind that in that case the pre-processing is not done and not all metrics will be shown in the viewer. Refer to [CUBE documentation](cube/) on usage of the GUI viewer. diff --git a/docs.it4i/software/debuggers/valgrind.md b/docs.it4i/software/debuggers/valgrind.md index b91eeb7a0642fb603a43029f577fb3310acee827..d94d205a7f26ce9bfdcbfa6c39aa24452bd1cad4 100644 --- a/docs.it4i/software/debuggers/valgrind.md +++ b/docs.it4i/software/debuggers/valgrind.md @@ -270,4 +270,4 @@ Prints this output : (note that there is output printed for every launched MPI p ==31319== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4) ``` -We can see that Valgrind has reported use of unitialised memory on the master process (which reads the array to be broadcast) and use of unaddresable memory on both processes. +We can see that Valgrind has reported use of uninitialized memory on the master process (which reads the array to be broadcast) and use of unaddressable memory on both processes. diff --git a/docs.it4i/software/debuggers/vampir.md b/docs.it4i/software/debuggers/vampir.md index 93d11f0ca571bd8dadd401b7f6173a3403683476..66a9439ae8e07bab5c0ac0225c34b1ae43071a67 100644 --- a/docs.it4i/software/debuggers/vampir.md +++ b/docs.it4i/software/debuggers/vampir.md @@ -1,6 +1,6 @@ # Vampir -Vampir is a commercial trace analysis and visualisation tool. It can work with traces in OTF and OTF2 formats. It does not have the functionality to collect traces, you need to use a trace collection tool (such as [Score-P](score-p/)) first to collect the traces. +Vampir is a commercial trace analysis and visualization tool. It can work with traces in OTF and OTF2 formats. It does not have the functionality to collect traces, you need to use a trace collection tool (such as [Score-P](score-p/)) first to collect the traces.  diff --git a/docs.it4i/software/intel/intel-suite/intel-compilers.md b/docs.it4i/software/intel/intel-suite/intel-compilers.md index 8e2ee714f6e5c61ec8b4e3b4522a3a06fdd11f46..92dd628492b5af8d5b86234c51eae17cbdb638d4 100644 --- a/docs.it4i/software/intel/intel-suite/intel-compilers.md +++ b/docs.it4i/software/intel/intel-suite/intel-compilers.md @@ -1,6 +1,6 @@ # Intel Compilers -The Intel compilers in multiple versions are available, via module intel. The compilers include the icc C and C++ compiler and the ifort fortran 77/90/95 compiler. +The Intel compilers in multiple versions are available, via module intel. The compilers include the icc C and C++ compiler and the ifort Fortran 77/90/95 compiler. ```console $ ml intel @@ -17,7 +17,7 @@ $ icc -ipo -O3 -xCORE-AVX2 -qopt-report1 -qopt-report-phase=vec myprog.c mysub $ ifort -ipo -O3 -xCORE-AVX2 -qopt-report1 -qopt-report-phase=vec myprog.f mysubroutines.f -o myprog.x ``` -In this example, we compile the program enabling interprocedural optimizations between source files (-ipo), aggresive loop optimizations (-O3) and vectorization (-xCORE-AVX2) +In this example, we compile the program enabling interprocedural optimizations between source files (-ipo), aggressive loop optimizations (-O3) and vectorization (-xCORE-AVX2) The compiler recognizes the omp, simd, vector and ivdep pragmas for OpenMP parallelization and AVX2 vectorization. Enable the OpenMP parallelization by the **-openmp** compiler switch. diff --git a/docs.it4i/software/intel/intel-suite/intel-debugger.md b/docs.it4i/software/intel/intel-suite/intel-debugger.md index ac7cec6ad56acbc3705fcdc478531e2cade64c47..4bef1eeb6beb4a52abc9a6d0d41f2b3f8ff1ba94 100644 --- a/docs.it4i/software/intel/intel-suite/intel-debugger.md +++ b/docs.it4i/software/intel/intel-suite/intel-debugger.md @@ -68,6 +68,6 @@ $ mpirun -n 48 -idb ./mympiprog.x Run the idb debugger in GUI mode. The menu Parallel contains number of tools for debugging multiple threads. One of the most useful tools is the **Serialize Execution** tool, which serializes execution of concurrent threads for easy orientation and identification of concurrency related bugs. -## Further Information +## Further information -Exhaustive manual on idb features and usage is published at Intel website, <https://software.intel.com/sites/products/documentation/doclib/iss/2013/compiler/cpp-lin/> +Exhaustive manual on IDB features and usage is published at Intel website, <https://software.intel.com/sites/products/documentation/doclib/iss/2013/compiler/cpp-lin/> diff --git a/docs.it4i/software/intel/intel-suite/intel-trace-analyzer-and-collector.md b/docs.it4i/software/intel/intel-suite/intel-trace-analyzer-and-collector.md index b7bf6c92d3a03112392a86078037aeff28e8623f..b735d9e58b65cfb179a65c1119746f8a2d2cde44 100644 --- a/docs.it4i/software/intel/intel-suite/intel-trace-analyzer-and-collector.md +++ b/docs.it4i/software/intel/intel-suite/intel-trace-analyzer-and-collector.md @@ -1,6 +1,6 @@ # Intel Trace Analyzer and Collector -Intel Trace Analyzer and Collector (ITAC) is a tool to collect and graphicaly analyze behaviour of MPI applications. It helps you to analyze communication patterns of your application, identify hotspots, perform correctnes checking (identify deadlocks, data corruption etc), simulate how your application would run on a different interconnect. +Intel Trace Analyzer and Collector (ITAC) is a tool to collect and graphically analyze behavior of MPI applications. It helps you to analyze communication patterns of your application, identify hotspots, perform correctness checking (identify deadlocks, data corruption etc), simulate how your application would run on a different interconnect. ITAC is a offline analysis tool - first you run your application to collect a trace file, then you can open the trace in a GUI analyzer to view it. diff --git a/docs.it4i/software/isv_licenses.md b/docs.it4i/software/isv_licenses.md index f26319ec1c0bcbe64bc4ca0ae92975a60572cabd..63cd44719f4c4bed5bdac8f04c3f57915977315c 100644 --- a/docs.it4i/software/isv_licenses.md +++ b/docs.it4i/software/isv_licenses.md @@ -60,11 +60,13 @@ The general format of the name is `feature__APP__FEATURE`. Names of applications (APP): -* ansys -* comsol -* comsol-edu -* matlab -* matlab-edu +```bash + ansys + comsol + comsol-edu + matlab + matlab-edu +``` To get the FEATUREs of a license take a look into the corresponding state file ([see above](isv_licenses/#Licence)), or use: diff --git a/docs.it4i/software/mpi/mpi.md b/docs.it4i/software/mpi/mpi.md index b307a96223a47fd3b8ff86681e2e8b0f7a483d60..271280bed3bf2ed6414a2e9fbc57c7d2d05de87c 100644 --- a/docs.it4i/software/mpi/mpi.md +++ b/docs.it4i/software/mpi/mpi.md @@ -109,7 +109,7 @@ $ mpicc helloworld_mpi.c -o helloworld_mpi.x The MPI program executable must be compatible with the loaded MPI module. Always compile and execute using the very same MPI module. -It is strongly discouraged to mix mpi implementations. Linking an application with one MPI implementation and running mpirun/mpiexec form other implementation may result in unexpected errors. +It is strongly discouraged to mix MPI implementations. Linking an application with one MPI implementation and running mpirun/mpiexec form other implementation may result in unexpected errors. The MPI program executable must be available within the same path on all nodes. This is automatically fulfilled on the /home and /scratch filesystem. You need to preload the executable, if running on the local scratch /lscratch filesystem. @@ -125,7 +125,7 @@ Optimal way to run an MPI program depends on its memory requirements, memory acc **One MPI** process per node, using 24 threads, is most useful for memory demanding applications, that make good use of processor cache memory and are not memory bound. This is also a preferred way for communication intensive applications as one process per node enjoys full bandwidth access to the network interface. -**Two MPI** processes per node, using 12 threads each, bound to processor socket is most useful for memory bandwidth bound applications such as BLAS1 or FFT, with scalable memory demand. However, note that the two processes will share access to the network interface. The 12 threads and socket binding should ensure maximum memory access bandwidth and minimize communication, migration and numa effect overheads. +**Two MPI** processes per node, using 12 threads each, bound to processor socket is most useful for memory bandwidth bound applications such as BLAS1 or FFT, with scalable memory demand. However, note that the two processes will share access to the network interface. The 12 threads and socket binding should ensure maximum memory access bandwidth and minimize communication, migration and NUMA effect overheads. !!! note Important! Bind every OpenMP thread to a core! diff --git a/docs.it4i/software/numerical-languages/matlab_1314.md b/docs.it4i/software/numerical-languages/matlab_1314.md index 1c2d29d3a3053d9a7bec0c5fc777fb024f0be369..9760bf63c6fdc3df08e74fcd68dc7182b66effa5 100644 --- a/docs.it4i/software/numerical-languages/matlab_1314.md +++ b/docs.it4i/software/numerical-languages/matlab_1314.md @@ -81,7 +81,7 @@ $ xhost + $ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A NONE-0-0 -q qexp -l select=4:ncpus=16:mpiprocs=16 -l walltime=00:30:00 -l feature__matlab__MATLAB=1 ``` -This qsub command example shows how to run Matlab with 32 workers in following configuration: 2 nodes (use all 16 cores per node) and 16 workers = mpirocs per node (-l select=2:ncpus=16:mpiprocs=16). If user requires to run smaller number of workers per node then the "mpiprocs" parameter has to be changed. +This qsub command example shows how to run Matlab with 32 workers in following configuration: 2 nodes (use all 16 cores per node) and 16 workers = mpiprocs per node (-l select=2:ncpus=16:mpiprocs=16). If user requires to run smaller number of workers per node then the "mpiprocs" parameter has to be changed. The second part of the command shows how to request all necessary licenses. In this case 1 Matlab-EDU license and 32 Distributed Computing Engines licenses. diff --git a/docs.it4i/software/numerical-libraries/fftw.md b/docs.it4i/software/numerical-libraries/fftw.md index 7345a811672a725f3916d601d4164e377580b3ab..f9470514b305954baa95ec1be8c70d13f1c05b89 100644 --- a/docs.it4i/software/numerical-libraries/fftw.md +++ b/docs.it4i/software/numerical-libraries/fftw.md @@ -4,7 +4,7 @@ The discrete Fourier transform in one or more dimensions, MPI parallel FFTW is a C subroutine library for computing the discrete Fourier transform in one or more dimensions, of arbitrary input size, and of both real and complex data (as well as of even/odd data, e.g. the discrete cosine/sine transforms or DCT/DST). The FFTW library allows for MPI parallel, in-place discrete Fourier transform, with data distributed over number of nodes. -Two versions, **3.3.3** and **2.1.5** of FFTW are available on Anselm, each compiled for **Intel MPI** and **OpenMPI** using **intel** and **gnu** compilers. These are available via modules: +Two versions, **3.3.3** and **2.1.5** of FFTW are available on Anselm, each compiled for **Intel MPI** and **OpenMPI** using **Intel** and **gnu** compilers. These are available via modules: | Version | Parallelization | module | linker options | | -------------- | --------------- | ------------------- | ----------------------------------- | diff --git a/docs.it4i/software/numerical-libraries/hdf5.md b/docs.it4i/software/numerical-libraries/hdf5.md index 13f626264cab05dd93d091b0752d1a4a8df2dcf5..7b61eeb5f1e35f118654c5908523b4e0d88360ed 100644 --- a/docs.it4i/software/numerical-libraries/hdf5.md +++ b/docs.it4i/software/numerical-libraries/hdf5.md @@ -4,7 +4,7 @@ Hierarchical Data Format library. Serial and MPI parallel version. [HDF5 (Hierarchical Data Format)](http://www.hdfgroup.org/HDF5/) is a general purpose library and file format for storing scientific data. HDF5 can store two primary objects: datasets and groups. A dataset is essentially a multidimensional array of data elements, and a group is a structure for organizing objects in an HDF5 file. Using these two basic objects, one can create and store almost any kind of scientific data structure, such as images, arrays of vectors, and structured and unstructured grids. You can also mix and match them in HDF5 files according to your needs. -Versions **1.8.11** and **1.8.13** of HDF5 library are available on Anselm, compiled for **Intel MPI** and **OpenMPI** using **intel** and **gnu** compilers. These are available via modules: +Versions **1.8.11** and **1.8.13** of HDF5 library are available on Anselm, compiled for **Intel MPI** and **OpenMPI** using **Intel** and **gnu** compilers. These are available via modules: | Version | Parallelization | module | C linker options | C++ linker options | Fortran linker options | | --------------------- | --------------------------------- | -------------------------- | --------------------- | ----------------------- | ----------------------- | diff --git a/docs.it4i/software/numerical-libraries/petsc.md b/docs.it4i/software/numerical-libraries/petsc.md index 214e4074ae075aec5ce70bfb3705bab3e7600b50..10e202e9309a215f86692fef3e4d7a0c2f291400 100644 --- a/docs.it4i/software/numerical-libraries/petsc.md +++ b/docs.it4i/software/numerical-libraries/petsc.md @@ -32,9 +32,7 @@ PETSc needs at least MPI, BLAS and LAPACK. These dependencies are currently sati PETSc can be linked with a plethora of [external numerical libraries](http://www.mcs.anl.gov/petsc/miscellaneous/external.html), extending PETSc functionality, e.g. direct linear system solvers, preconditioners or partitioners. See below a list of libraries currently included in Anselm `petsc` modules. All these libraries can be used also alone, without PETSc. Their static or shared program libraries are available in -`$PETSC_DIR/$PETSC_ARCH/lib` and header files in `$PETSC_DIR/$PETSC_ARCH/include`. `PETSC_DIR` and `PETSC_ARCH` are environment variables pointing to a specific PETSc instance based on the petsc module loaded. - -### Libraries Linked to PETSc on Anselm (As of 11 April 2015) +`$PETSC_DIR/$PETSC_ARCH/lib` and header files in `$PETSC_DIR/$PETSC_ARCH/include`. `PETSC_DIR` and `PETSC_ARCH` are environment variables pointing to a specific PETSc instance based on the PETSc module loaded. * dense linear algebra * [Elemental](http://libelemental.org/) diff --git a/docs.it4i/software/numerical-libraries/trilinos.md b/docs.it4i/software/numerical-libraries/trilinos.md index 36688e989a9b83b657707d988472109144e02226..c6c3fba1a8d1e4c765e7026ef6f77c4120e2da07 100644 --- a/docs.it4i/software/numerical-libraries/trilinos.md +++ b/docs.it4i/software/numerical-libraries/trilinos.md @@ -34,7 +34,7 @@ $ ml trilinos For the compilation of CMake-aware project, Trilinos provides the FIND_PACKAGE( Trilinos ) capability, which makes it easy to build against Trilinos, including linking against the correct list of libraries. For details, see <http://trilinos.sandia.gov/Finding_Trilinos.txt> -For compiling using simple makefiles, Trilinos provides Makefile.export system, which allows users to include important Trilinos variables directly into their makefiles. This can be done simply by inserting the following line into the makefile: +For compiling using simple Makefiles, Trilinos provides Makefile.export system, which allows users to include important Trilinos variables directly into their Makefiles. This can be done simply by inserting the following line into the Makefile: ```cpp include Makefile.export.Trilinos @@ -46,4 +46,4 @@ or include Makefile.export.<package> ``` -if you are interested only in a specific Trilinos package. This will give you access to the variables such as Trilinos_CXX_COMPILER, Trilinos_INCLUDE_DIRS, Trilinos_LIBRARY_DIRS etc. For the detailed description and example makefile see <http://trilinos.sandia.gov/Export_Makefile.txt>. +if you are interested only in a specific Trilinos package. This will give you access to the variables such as Trilinos_CXX_COMPILER, Trilinos_INCLUDE_DIRS, Trilinos_LIBRARY_DIRS etc. For the detailed description and example Makefile see <http://trilinos.sandia.gov/Export_Makefile.txt>. diff --git a/docs.it4i/software/orca.md b/docs.it4i/software/orca.md new file mode 100644 index 0000000000000000000000000000000000000000..8215a9ead80e0acdebf245ce93d9e74fe5bf07a5 --- /dev/null +++ b/docs.it4i/software/orca.md @@ -0,0 +1,98 @@ +# ORCA + +ORCA is a flexible, efficient and easy-to-use general purpose tool for quantum chemistry with specific emphasis on spectroscopic properties of open-shell molecules. It features a wide variety of standard quantum chemical methods ranging from semiempirical methods to DFT to single- and multi-reference correlated ab initio methods. It can also treat environmental and relativistic effects. + +## Making ORCA Available + +The following module command makes the latest version of orca available to your session + +```bash +$ module load ORCA/3_0_3-linux_x86-64 +``` + +### Dependency + +```bash +$ module list +Currently Loaded Modulefiles: + 1) /opt/modules/modulefiles/oscar-modules/1.0.3(default) + 2) GCC/4.9.3-binutils-2.25 + 3) binutils/2.25-GCC-4.9.3-binutils-2.25 + 4) GNU/4.9.3-2.25 + 5) icc/2015.3.187-GNU-4.9.3-2.25 + 6) ifort/2015.3.187-GNU-4.9.3-2.25 + 7) iccifort/2015.3.187-GNU-4.9.3-2.25 + 8) numactl/2.0.10-iccifort-2015.3.187-GNU-4.9.3-2.25 + 9) hwloc/1.11.1-iccifort-2015.3.187-GNU-4.9.3-2.25 + 10) OpenMPI/1.8.8-iccifort-2015.3.187-GNU-4.9.3-2.25 + 11) ORCA/3_0_3-linux_x86-64 +``` + +## Example Single Core Job + +Create a file called orca_serial.inp that contains the following orca commands + +```cpp + # My first ORCA calculation :-) + # + # Taken from the Orca manual + # https://orcaforum.cec.mpg.de/OrcaManual.pdf + ! HF SVP + * xyz 0 1 + C 0 0 0 + O 0 0 1.13 + * +``` + +Create a Sun Grid Engine submission file called submit_serial.sh that looks like this + +```bash +!/bin/bash + +module load ORCA/3_0_3-linux_x86-64 +orca orca_serial.inp +``` + +Submit the job to the queue with the command + +```bash +$ qsub -q qexp -I -l select=1 +qsub: waiting for job 196821.isrv5 to start +qsub: job 196821.isrv5 ready +[username@r37u04n944 ~]$ ./submit_serial.sh + + + ***************** + * O R C A * + ***************** + + --- An Ab Initio, DFT and Semiempirical electronic structure package --- + + ####################################################### + # -***- # + # Department of molecular theory and spectroscopy # + # Directorship: Frank Neese # + # Max Planck Institute for Chemical Energy Conversion # + # D-45470 Muelheim/Ruhr # + # Germany # + # # + # All rights reserved # + # -***- # + ####################################################### + +... + + ****ORCA TERMINATED NORMALLY**** +TOTAL RUN TIME: 0 days 0 hours 0 minutes 2 seconds 796 msec +[username@r37u04n944 orca]$ logout + +qsub: job 196821.isrv5 completed +``` + +## Register as a User + +You are encouraged to register as a user of Orca at [Here](https://orcaforum.cec.mpg.de/) in order to take advantage of updates, announcements and also of the users forum. + +## Documentation + +A comprehensive [.pdf](https://orcaforum.cec.mpg.de/OrcaManual.pdf) manual is available online. diff --git a/docs.it4i/software/tools/ansys/ansys-cfx.md b/docs.it4i/software/tools/ansys/ansys-cfx.md index 111c5e3663dd2102e1026f65fae2aeb1fbd8f599..5650e4cca323b5313f993629fe343e596e6663ae 100644 --- a/docs.it4i/software/tools/ansys/ansys-cfx.md +++ b/docs.it4i/software/tools/ansys/ansys-cfx.md @@ -49,7 +49,7 @@ echo Machines: $hl Header of the PBS file (above) is common and description can be find on [this site](../../anselm/job-submission-and-execution/). SVS FEM recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. -Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. >Input file has to be defined by common CFX def file which is attached to the cfx solver via parameter -def +Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. >Input file has to be defined by common CFX def file which is attached to the CFX solver via parameter -def **License** should be selected by parameter -P (Big letter **P**). Licensed products are the following: aa_r (ANSYS **Academic** Research), ane3fl (ANSYS Multiphysics)-**Commercial**. [More about licensing here](licensing/) diff --git a/docs.it4i/software/tools/ansys/licensing.md b/docs.it4i/software/tools/ansys/licensing.md index eac78966d4b5183b2f0052d2ab6aea37f28eccc5..9aa26a1768888fbe7b750ed05b6b09221a62a804 100644 --- a/docs.it4i/software/tools/ansys/licensing.md +++ b/docs.it4i/software/tools/ansys/licensing.md @@ -1,6 +1,6 @@ # Licensing and Available Versions -## ANSYS Licence Can Be Used By: +## ANSYS license can be used by: * all persons in the carrying out of the CE IT4Innovations Project (In addition to the primary licensee, which is VSB - Technical University of Ostrava, users are CE IT4Innovations third parties - CE IT4Innovations project partners, particularly the University of Ostrava, the Brno University of Technology - Faculty of Informatics, the Silesian University in Opava, Institute of Geonics AS CR.) * all persons who have a valid license diff --git a/docs.it4i/software/tools/ansys/setting-license-preferences.md b/docs.it4i/software/tools/ansys/setting-license-preferences.md index 769f7470269bf6603bcfa54f69efe66628c429db..e758895a46376a7e50012686218d2d8d11f4c985 100644 --- a/docs.it4i/software/tools/ansys/setting-license-preferences.md +++ b/docs.it4i/software/tools/ansys/setting-license-preferences.md @@ -1,6 +1,6 @@ # Setting License Preferences -Some ANSYS tools allow you to explicitly specify usage of academic or commercial licenses in the command line (eg. ansys161 -p aa_r to select Academic Research license). However, we have observed that not all tools obey this option and choose commercial license. +Some ANSYS tools allow you to explicitly specify usage of academic or commercial licenses in the command line (e.g. ansys161 -p aa_r to select Academic Research license). However, we have observed that not all tools obey this option and choose commercial license. Thus you need to configure preferred license order with ANSLIC_ADMIN. Please follow these steps and move Academic Research license to the top or bottom of the list accordingly. diff --git a/docs.it4i/software/tools/ansys/workbench.md b/docs.it4i/software/tools/ansys/workbench.md index 0cd523838cc5a4367d3439354e20b1a3caa9fca7..fc33e5082490709e61f10b842d4975aa28407503 100644 --- a/docs.it4i/software/tools/ansys/workbench.md +++ b/docs.it4i/software/tools/ansys/workbench.md @@ -6,13 +6,13 @@ It is possible to run Workbench scripts in batch mode. You need to configure sol  -Enable Distribute Solution checkbox and enter number of cores (eg. 48 to run on two Salomon nodes). If you want the job to run on more then 1 node, you must also provide a so called MPI appfile. In the Additional Command Line Arguments input field, enter: +Enable Distribute Solution checkbox and enter number of cores (e.g. 48 to run on two Salomon nodes). If you want the job to run on more then 1 node, you must also provide a so called MPI appfile. In the Additional Command Line Arguments input field, enter: ```console -mpifile /path/to/my/job/mpifile.txt ``` -Where /path/to/my/job is the directory where your project is saved. We will create the file mpifile.txt programatically later in the batch script. For more information, refer to \*ANSYS Mechanical APDL Parallel Processing\* \*Guide\*. +Where /path/to/my/job is the directory where your project is saved. We will create the file mpifile.txt programmatically later in the batch script. For more information, refer to \*ANSYS Mechanical APDL Parallel Processing\* \*Guide\*. Now, save the project and close Workbench. We will use this script to launch the job: diff --git a/docs.it4i/software/tools/virtualization.md b/docs.it4i/software/tools/virtualization.md index 9491b324739408195f0c24f8dc5044abf9179da6..066d14b9c3ccecf5735f27074f0d3a2e5128568a 100644 --- a/docs.it4i/software/tools/virtualization.md +++ b/docs.it4i/software/tools/virtualization.md @@ -73,7 +73,7 @@ QEMU currently supports these image types or formats: * vmdk - VMware 3 & 4, or 6 image format, for exchanging images with that product * vdi - VirtualBox 1.1 compatible image format, for exchanging images with VirtualBox. -You can convert your existing image using qemu-img convert command. Supported formats of this command are: blkdebug blkverify bochs cloop cow dmg file ftp ftps host_cdrom host_device host_floppy http https nbd parallels qcow qcow2 qed raw sheepdog tftp vdi vhdx vmdk vpc vvfat. +You can convert your existing image using `qemu-img convert` command. Supported formats of this command are: `blkdebug blkverify bochs cloop cow dmg file ftp ftps host_cdrom host_device host_floppy http https nbd parallels qcow qcow2 qed raw sheepdog tftp vdi vhdx vmdk vpc vvfat`. We recommend using advanced QEMU native image format qcow2. @@ -89,7 +89,7 @@ Remove all unnecessary software and files. Remove all paging space, swap files, partitions, etc. -Shrink your image. (It is recommended to zero all free space and reconvert image using qemu-img.) +Shrink your image. (It is recommended to zero all free space and reconvert image using `qemu-img`.) ### Modify Your Image for Running Jobs @@ -228,7 +228,7 @@ $ qemu-system-x86_64 -hda linux.img -enable-kvm -cpu host -smp 16 -m 32768 -vga $ qemu-system-x86_64 -hda win.img -enable-kvm -cpu host -smp 16 -m 32768 -vga std -localtime -usb -usbdevice tablet -vnc :0 ``` -You can access virtual machine by VNC viewer (option -vnc) connecting to IP address of compute node. For VNC you must use VPN network. +You can access virtual machine by VNC viewer (option `-vnc`) connecting to IP address of compute node. For VNC you must use VPN network. Install virtual machine from ISO file @@ -299,7 +299,7 @@ Run SLIRP daemon over SSH tunnel on login node and connect it to virtual network $ dpipe vde_plug /tmp/sw0 = ssh login1 $VDE2_DIR/bin/slirpvde -s - --dhcp & ``` -Run qemu using vde network back-end, connect to created virtual switch. +Run QEMU using VDE network back-end, connect to created virtual switch. Basic setup (obsolete syntax) @@ -321,7 +321,7 @@ $ qemu-system-x86_64 ... -device virtio-net-pci,netdev=net0 -netdev vde,id=net0, #### TAP Interconnect -Both user and vde network back-end have low performance. For fast interconnect (10 Gbit/s and more) of compute node (host) and virtual machine (guest) we suggest using Linux kernel TAP device. +Both user and VDE network back-end have low performance. For fast interconnect (10 Gbit/s and more) of compute node (host) and virtual machine (guest) we suggest using Linux kernel TAP device. Cluster Anselm provides TAP device tap0 for your job. TAP interconnect does not provide any services (like NAT, DHCP, DNS, SMB, etc.) just raw networking, so you should provide your services if you need them. @@ -331,7 +331,7 @@ To enable TAP interconect feature you need to specify virt_network=True PBS reso $ qsub ... -l virt_network=True ``` -Run qemu with TAP network back-end: +Run QEMU with TAP network back-end: ```console $ qemu-system-x86_64 ... -device virtio-net-pci,netdev=net1 -netdev tap,id=net1,ifname=tap0,script=no,downscript=no @@ -341,9 +341,9 @@ Interface tap0 has IP address 192.168.1.1 and network mask 255.255.255.0 (/24). Redirected ports: -* DNS udp/53->udp/3053, tcp/53->tcp3053 -* DHCP udp/67->udp3067 -* SMB tcp/139->tcp3139, tcp/445->tcp3445). +* DNS UDP/53->UDP/3053, TCP/53->TCP/3053 +* DHCP UDP/67->UDP/3067 +* SMB TCP/139->TCP/3139, TCP/445->TCP/3445). You can configure IP address of virtual machine statically or dynamically. For dynamic addressing provide your DHCP server on port 3067 of tap0 interface, you can also provide your DNS server on port 3053 of tap0 interface for example: diff --git a/docs.it4i/software/viz/gpi2.md b/docs.it4i/software/viz/gpi2.md index 1de40bd8a592cf0d450a8744f704a767004b2b6a..e3a158bae823a54359592ad16fc420eec2b49bfe 100644 --- a/docs.it4i/software/viz/gpi2.md +++ b/docs.it4i/software/viz/gpi2.md @@ -156,7 +156,7 @@ cn79 $ gaspi_run -m machinefile ./helloworld_gpi.x Hello from rank 0 of 2 ``` -At the same time, in another session, you may start the gaspi logger: +At the same time, in another session, you may start the GASPI logger: ```console $ ssh cn79