diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5928ace6dd55c58f8ead4e72e839d6216f45c8f5..de9f89996a010d733321b4eac7cafbf42a3643c1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,25 +6,26 @@ stages: docs: stage: test image: davidhrbac/docker-mdcheck:latest - allow_failure: true script: - - mdl -r ~MD013,~MD033,~MD014,~MD026,~MD037 *.md docs.it4i/ + - mdl -r ~MD013,~MD033,~MD014,~MD026,~MD037,~MD036,~MD010,~MD029 *.md docs.it4i # BUGS -two spaces: - stage: test - image: davidhrbac/docker-mdcheck:latest - allow_failure: true - script: - - echo "== Files having more than one space betwee two characters ==" - - find docs.it4i/ -name '*.md' -exec grep "[[:alpha:]] [[:alpha:]]" -l {} + || true +# nefunkcni, zbytecne, bere to i vypisy konzole +#two spaces: +# stage: test +# image: davidhrbac/docker-mdcheck:latest +# allow_failure: true +# before_script: +# - echo "== Files having more than one space betwee two characters ==" +# - find docs.it4i/ -name '*.md' ! -path "docs.it4i/software*" -exec grep -nr "[[:alpha:]] [[:alpha:]]" -l {} + +# script: +# - find docs.it4i/ -name '*.md' ! -path "docs.it4i/software*" -exec grep -nr "[[:alpha:]] [[:alpha:]]" -l {} + capitalize: stage: test image: davidhrbac/docker-mkdocscheck:latest - allow_failure: true + # allow_failure: true script: - - scripts/titlemd_test.py mkdocs.yml - - find docs.it4i/ -name '*.md' -print0 | xargs -0 -n1 scripts/titlemd_test.py + - find mkdocs.yml docs.it4i/ \( -name '*.md' -o -name '*.yml' \) -print0 | xargs -0 -n1 scripts/titlemd_test.py spell check: stage: test @@ -39,7 +40,7 @@ ext_links: image: davidhrbac/docker-mdcheck:latest allow_failure: true after_script: - # remove JSON results + # remove JSON results - rm *.json script: #- find docs.it4i/ -name '*.md' -exec grep --color -l http {} + | xargs awesome_bot -t 10 @@ -59,42 +60,47 @@ mkdocs: stage: build image: davidhrbac/docker-mkdocscheck:latest script: + - mkdocs -V #- apt-get update #- apt-get -y install git # add version to footer - bash scripts/add_version.sh - # get modules list from clusters + # get modules list from clusters - bash scripts/get_modules.sh # regenerate modules matrix - python scripts/modules-matrix.py > docs.it4i/modules-matrix.md - python scripts/modules-json.py > docs.it4i/modules-matrix.json + - curl -f0 https://scs-test.it4i.cz/devel/apidocs/master/scs_api.server_public.md -o docs.it4i/apiv1.md # build pages - mkdocs build # compress search_index.json #- bash scripts/clean_json.sh site/mkdocs/search_index.json # replace broken links in 404.html - sed -i 's,href="" title=",href="/" title=",g' site/404.html + - cp site/404.html site/403.html + - sed -i 's/404 - Not found/403 - Forbidden/g' site/403.html # compress sitemap - - gzip < site/sitemap.xml > site/sitemap.xml.gz + - gzip < site/sitemap.xml > site/sitemap.xml.gz artifacts: paths: - site expire_in: 1 week -shellcheck: - stage: test - image: davidhrbac/docker-shellcheck:latest - allow_failure: true - script: - - which shellcheck || apt-get update && apt-get install -y shellcheck - - find . -name *.sh -not -path "./docs.it4i/*" -not -path "./site/*" -exec shellcheck {} + +## zbytecnost v dokumentaci +#shellcheck: +# stage: test +# image: davidhrbac/docker-shellcheck:latest +# allow_failure: true +# script: +# - which shellcheck || apt-get update && apt-get install -y shellcheck +# - find . -name *.sh -not -path "./docs.it4i/*" -not -path "./site/*" -exec shellcheck {} + deploy to stage: - environment: stage + environment: stage stage: deploy image: davidhrbac/docker-mkdocscheck:latest before_script: - # install ssh-agent + # install ssh-agent - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )' - 'which rsync || ( apt-get update -y && apt-get install rsync -y )' # run ssh-agent @@ -117,7 +123,7 @@ deploy to production: stage: deploy image: davidhrbac/docker-mkdocscheck:latest before_script: - # install ssh-agent + # install ssh-agent - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )' - 'which rsync || ( apt-get update -y && apt-get install rsync -y )' # run ssh-agent @@ -127,7 +133,7 @@ deploy to production: # disable host key checking (NOTE: makes you susceptible to man-in-the-middle attacks) # WARNING: use only in docker container, if you use it with shell you will overwrite your user's ssh config - mkdir -p ~/.ssh - - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config + - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config - useradd -lM nginx script: - chown nginx:nginx site -R diff --git a/.spelling b/.spelling index 1cb0368c8bf540de371e775f49e4848e7a78d425..dbf08ddc0112fda8c57d67f854e437c571bad0ed 100644 --- a/.spelling +++ b/.spelling @@ -3,6 +3,14 @@ # global dictionary is at the start, file overrides afterwards # one word per line, to define a file override use ' - filename' # where filename is relative to this configuration file +CAE +CUBE +GPU +GSL +LMGC90 +LS-DYNA +MAPDL +GPI-2 COM .ssh Anselm @@ -260,3 +268,5 @@ r37u31n1008 qsub it4ifree it4i.portal.clients +x86 +x64 \ No newline at end of file diff --git a/README.md b/README.md index 78d4938aba704d2292ddb4e864a94f1435e90274..7ca5d57a12709555af7dac7b80f093118926f22d 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,8 @@ Mellanox ### Formulas are made with: -* https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/ -* https://www.mathjax.org/ +* [https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/](https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/) +* [https://www.mathjax.org/](https://www.mathjax.org/) You can add formula to page like this: diff --git a/docs.it4i/anselm/capacity-computing.md b/docs.it4i/anselm/capacity-computing.md index b4a0c25b90aa93fccdf6a07d9c915d5da58411a1..2f7d0cb006540a50f990625026de582bd3fef4e2 100644 --- a/docs.it4i/anselm/capacity-computing.md +++ b/docs.it4i/anselm/capacity-computing.md @@ -1,4 +1,4 @@ -# Capacity computing +# Capacity Computing ## Introduction diff --git a/docs.it4i/anselm/compute-nodes.md b/docs.it4i/anselm/compute-nodes.md index 4b92900592e095383ce08d2c73321f001b4c2983..d02fd3dba30505d50f299e7e87ffa292118abfb1 100644 --- a/docs.it4i/anselm/compute-nodes.md +++ b/docs.it4i/anselm/compute-nodes.md @@ -21,7 +21,7 @@ Anselm is cluster of x86-64 Intel based nodes built on Bull Extreme Computing bu * two Intel Sandy Bridge E5-2470, 8-core, 2.3GHz processors per node * 96 GB of physical memory per node * one 500GB SATA 2,5” 7,2 krpm HDD per node -* GPU accelerator 1x NVIDIA Tesla Kepler K20 per node +* GPU accelerator 1x NVIDIA Tesla Kepler K20m per node * bullx B515 blade servers * cn[181-203] @@ -52,12 +52,12 @@ Anselm is cluster of x86-64 Intel based nodes built on Bull Extreme Computing bu ### Compute Nodes Summary -| Node type | Count | Range | Memory | Cores | [Access](resources-allocation-policy/) | -| -------------------------- | ----- | ----------- | ------ | ----------- | -------------------------------------- | -| Nodes without accelerator | 180 | cn[1-180] | 64GB | 16 @ 2.4GHz | qexp, qprod, qlong, qfree | -| Nodes with GPU accelerator | 23 | cn[181-203] | 96GB | 16 @ 2.3GHz | qgpu, qexp | -| Nodes with MIC accelerator | 4 | cn[204-207] | 96GB | 16 @ 2.3GHz | qmic, qexp | -| Fat compute nodes | 2 | cn[208-209] | 512GB | 16 @ 2.4GHz | qfat, qexp | +| Node type | Count | Range | Memory | Cores | [Access](resources-allocation-policy/) | +| -------------------------- | ----- | ----------- | ------ | ----------- | -------------------------------------- | +| Nodes without accelerator | 180 | cn[1-180] | 64GB | 16 @ 2.4GHz | qexp, qprod, qlong, qfree, qatlas, qprace | +| Nodes with GPU accelerator | 23 | cn[181-203] | 96GB | 16 @ 2.3GHz | qnvidia, qexp, qatlas | +| Nodes with MIC accelerator | 4 | cn[204-207] | 96GB | 16 @ 2.3GHz | qmic, qexp | +| Fat compute nodes | 2 | cn[208-209] | 512GB | 16 @ 2.4GHz | qfat, qexp | ## Processor Architecture diff --git a/docs.it4i/anselm/environment-and-modules.md b/docs.it4i/anselm/environment-and-modules.md index d460fa7023c41f16c9be748205061e78f26da3a9..e28a3405f831409b7bc84edfe410d4b226bb3a57 100644 --- a/docs.it4i/anselm/environment-and-modules.md +++ b/docs.it4i/anselm/environment-and-modules.md @@ -42,13 +42,13 @@ The modules may be loaded, unloaded and switched, according to momentary needs. To check available modules use ```console -$ module avail **or** ml av +$ ml av ``` To load a module, for example the octave module use ```console -$ module load octave **or** ml octave +$ ml octave ``` loading the octave module will set up paths and environment variables of your active shell such that you are ready to run the octave software @@ -56,19 +56,13 @@ loading the octave module will set up paths and environment variables of your ac To check loaded modules use ```console -$ module list **or** ml +$ ml ``` To unload a module, for example the octave module use ```console -$ module unload octave **or** ml -octave -``` - -Learn more on modules by reading the module man page - -```console -$ man module +$ ml -octave ``` Following modules set up the development environment @@ -79,10 +73,6 @@ PrgEnv-intel sets up the INTEL development environment in conjunction with the I ## Application Modules Path Expansion -All application modules on Salomon cluster (and further) will be build using tool called [EasyBuild](http://hpcugent.github.io/easybuild/ "EasyBuild"). In case that you want to use some applications that are build by EasyBuild already, you have to modify your MODULEPATH environment variable. - -```console -export MODULEPATH=$MODULEPATH:/apps/easybuild/modules/all/ -``` +All application modules on Anselm cluster (and further) will be build using tool called [EasyBuild](http://hpcugent.github.io/easybuild/ "EasyBuild"). This command expands your searched paths to modules. You can also add this command to the .bashrc file to expand paths permanently. After this command, you can use same commands to list/add/remove modules as is described above. diff --git a/docs.it4i/anselm/hardware-overview.md b/docs.it4i/anselm/hardware-overview.md index f130bd152f8666dd30cf9d3a7021d04f4ffa99f3..1a1ecde339a06d1b6cbb67f3be4ea7002a687349 100644 --- a/docs.it4i/anselm/hardware-overview.md +++ b/docs.it4i/anselm/hardware-overview.md @@ -1,8 +1,8 @@ # Hardware Overview -The Anselm cluster consists of 209 computational nodes named cn[1-209] of which 180 are regular compute nodes, 23 GPU Kepler K20 accelerated nodes, 4 MIC Xeon Phi 5110P accelerated nodes and 2 fat nodes. Each node is a powerful x86-64 computer, equipped with 16 cores (two eight-core Intel Sandy Bridge processors), at least 64 GB RAM, and local hard drive. The user access to the Anselm cluster is provided by two login nodes login[1,2]. The nodes are interlinked by high speed InfiniBand and Ethernet networks. All nodes share 320 TB /home disk storage to store the user files. The 146 TB shared /scratch storage is available for the scratch data. +The Anselm cluster consists of 209 computational nodes named cn[1-209] of which 180 are regular compute nodes, 23 GPU Kepler K20m accelerated nodes, 4 MIC Xeon Phi 5110P accelerated nodes and 2 fat nodes. Each node is a powerful x86-64 computer, equipped with 16 cores (two eight-core Intel Sandy Bridge processors), at least 64 GB RAM, and local hard drive. The user access to the Anselm cluster is provided by two login nodes login[1,2]. The nodes are interlinked by high speed InfiniBand and Ethernet networks. All nodes share 320 TB /home disk storage to store the user files. The 146 TB shared /scratch storage is available for the scratch data. -The Fat nodes are equipped with large amount (512 GB) of memory. Virtualization infrastructure provides resources to run long term servers and services in virtual mode. Fat nodes and virtual servers may access 45 TB of dedicated block storage. Accelerated nodes, fat nodes, and virtualization infrastructure are available [upon request](https://support.it4i.cz/rt) made by a PI. +The Fat nodes are equipped with large amount (512 GB) of memory. Fat nodes may access 45 TB of dedicated block storage. Accelerated nodes, fat nodes are available [upon request](https://support.it4i.cz/rt) made by a PI. Schematic representation of the Anselm cluster. Each box represents a node (computer) or storage capacity: @@ -13,7 +13,7 @@ The cluster compute nodes cn[1-207] are organized within 13 chassis. There are four types of compute nodes: * 180 compute nodes without the accelerator -* 23 compute nodes with GPU accelerator - equipped with NVIDIA Tesla Kepler K20 +* 23 compute nodes with GPU accelerator - equipped with NVIDIA Tesla Kepler K20m * 4 compute nodes with MIC accelerator - equipped with Intel Xeon Phi 5110P * 2 fat nodes - equipped with 512 GB RAM and two 100 GB SSD drives @@ -34,7 +34,7 @@ The parameters are summarized in the following tables: | ------------------------------------------- | -------------------------------------------- | | Primary purpose | High Performance Computing | | Architecture of compute nodes | x86-64 | -| Operating system | Linux | +| Operating system | Linux (CentOS) | | [**Compute nodes**](compute-nodes/) | | | Totally | 209 | | Processor cores | 16 (2 x 8 cores) | @@ -53,7 +53,7 @@ The parameters are summarized in the following tables: | Node | Processor | Memory | Accelerator | | ---------------- | --------------------------------------- | ------ | -------------------- | | w/o accelerator | 2 x Intel Sandy Bridge E5-2665, 2.4 GHz | 64 GB | - | -| GPU accelerated | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB | NVIDIA Kepler K20 | +| GPU accelerated | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB | NVIDIA Kepler K20m | | MIC accelerated | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB | Intel Xeon Phi 5110P | | Fat compute node | 2 x Intel Sandy Bridge E5-2665, 2.4 GHz | 512 GB | - | diff --git a/docs.it4i/anselm/job-priority.md b/docs.it4i/anselm/job-priority.md index 30eba2bd004ff45ec5532d06ea556bdd08b7f56c..09acc4cedc9a6cad4ac9b51fdc46ddac2ef91ad2 100644 --- a/docs.it4i/anselm/job-priority.md +++ b/docs.it4i/anselm/job-priority.md @@ -1,4 +1,4 @@ -# Job scheduling +# Job Scheduling ## Job Execution Priority @@ -54,7 +54,7 @@ Job execution priority (job sort formula) is calculated as: ---8<--- "job_sort_formula.md" -### Job backfilling +### Job Backfilling Anselm cluster uses job backfilling. diff --git a/docs.it4i/anselm/job-submission-and-execution.md b/docs.it4i/anselm/job-submission-and-execution.md index 63490e6a8123229d4b413847f86130fdec396441..fc916199f6f98c90f19383c5f058aa53a9023272 100644 --- a/docs.it4i/anselm/job-submission-and-execution.md +++ b/docs.it4i/anselm/job-submission-and-execution.md @@ -1,4 +1,4 @@ -# Job submission and execution +# Job Submission and Execution ## Job Submission @@ -324,10 +324,10 @@ cp $PBS_O_WORKDIR/input . cp $PBS_O_WORKDIR/mympiprog.x . # load the mpi module -module load openmpi +ml OpenMPI # execute the calculation -mpiexec -pernode ./mympiprog.x +mpirun -pernode ./mympiprog.x # copy output file to home cp output $PBS_O_WORKDIR/. @@ -362,10 +362,10 @@ SCRDIR=/scratch/$USER/myjob cd $SCRDIR || exit # load the mpi module -module load openmpi +ml OpenMPI # execute the calculation -mpiexec ./mympiprog.x +mpirun ./mympiprog.x #exit exit diff --git a/docs.it4i/anselm/prace.md b/docs.it4i/anselm/prace.md index 061cd0a0714075f3caca51152363e10ff795176f..9b24551b7b2fdea37177ccf11e42fe8b1cff3ef8 100644 --- a/docs.it4i/anselm/prace.md +++ b/docs.it4i/anselm/prace.md @@ -210,7 +210,7 @@ All system wide installed software on the cluster is made available to the users PRACE users can use the "prace" module to use the [PRACE Common Production Environment](http://www.prace-ri.eu/prace-common-production-environment/). ```console -$ module load prace +$ ml prace ``` ### Resource Allocation and Job Execution diff --git a/docs.it4i/anselm/remote-visualization.md b/docs.it4i/anselm/remote-visualization.md.disable similarity index 99% rename from docs.it4i/anselm/remote-visualization.md rename to docs.it4i/anselm/remote-visualization.md.disable index e5a439b4654da5342101d15287212501b87c0df9..93d5cd23b4fc8c2c6856a511dcb7a31cf4d4fb00 100644 --- a/docs.it4i/anselm/remote-visualization.md +++ b/docs.it4i/anselm/remote-visualization.md.disable @@ -1,4 +1,4 @@ -# Remote visualization service +# Remote Visualization Service ## Introduction diff --git a/docs.it4i/anselm/resource-allocation-and-job-execution.md b/docs.it4i/anselm/resource-allocation-and-job-execution.md index 8df8072c9e5ddefbeba31c071309697ae1d6f92b..4585588dd18b1d308bd32c434dd2b09f50f9c154 100644 --- a/docs.it4i/anselm/resource-allocation-and-job-execution.md +++ b/docs.it4i/anselm/resource-allocation-and-job-execution.md @@ -1,6 +1,6 @@ # Resource Allocation and Job Execution -To run a [job](ob-submission-and-execution/), [computational resources](resources-allocation-policy/) for this particular job must be allocated. This is done via the PBS Pro job workload manager software, which efficiently distributes workloads across the supercomputer. Extensive information about PBS Pro can be found in the [official documentation here](../pbspro/), especially in the PBS Pro User's Guide. +To run a [job](job-submission-and-execution/), [computational resources](resources-allocation-policy/) for this particular job must be allocated. This is done via the PBS Pro job workload manager software, which efficiently distributes workloads across the supercomputer. Extensive information about PBS Pro can be found in the [official documentation here](../pbspro/), especially in the PBS Pro User's Guide. ## Resources Allocation Policy diff --git a/docs.it4i/anselm/resources-allocation-policy.md b/docs.it4i/anselm/resources-allocation-policy.md index e4e6bfeaff0f314a7e0173ae2fa6c4bb5a5c345a..25c527bc3a378dc3a94dd04a97b44fc7e376e9bb 100644 --- a/docs.it4i/anselm/resources-allocation-policy.md +++ b/docs.it4i/anselm/resources-allocation-policy.md @@ -1,6 +1,6 @@ # Resources Allocation Policy -## Introduction +## Job Queue Policies The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. The Fair-share at Anselm ensures that individual users may consume approximately equal amount of resources per week. Detailed information in the [Job scheduling](job-priority/) section. The resources are accessible via several queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. Following table provides the queue partitioning overview: @@ -9,7 +9,7 @@ The resources are allocated to the job in a fair-share fashion, subject to const | queue | active project | project resources | nodes | min ncpus | priority | authorization | walltime | | ------------------- | -------------- | ----------------- | ---------------------------------------------------- | --------- | -------- | ------------- | -------- | -| qexp | no | none required | 2 reserved, 31 totalincluding MIC, GPU and FAT nodes | 1 | 150 | no | 1 h | +| qexp | no | none required | 2 reserved, 31 totalincluding MIC, GPU | 1 | 150 | no | 1 h | | qprod | yes | 0 | 178 nodes w/o accelerator | 16 | 0 | no | 24/48 h | | qlong | yes | 0 | 60 nodes w/o accelerator | 16 | 0 | no | 72/144 h | | qnvidia, qmic | yes | 0 | 23 nvidia nodes, 4 mic nodes | 16 | 200 | yes | 24/48 h | @@ -21,13 +21,13 @@ The resources are allocated to the job in a fair-share fashion, subject to const **The qexp queue is equipped with the nodes not having the very same CPU clock speed.** Should you need the very same CPU speed, you have to select the proper nodes during the PSB job submission. -* **qexp**, the Express queue: This queue is dedicated for testing and running very small jobs. It is not required to specify a project to enter the qexp. There are 2 nodes always reserved for this queue (w/o accelerator), maximum 8 nodes are available via the qexp for a particular user, from a pool of nodes containing Nvidia accelerated nodes (cn181-203), MIC accelerated nodes (cn204-207) and Fat nodes with 512GB RAM (cn208-209). This enables to test and tune also accelerated code or code with higher RAM requirements. The nodes may be allocated on per core basis. No special authorization is required to use it. The maximum runtime in qexp is 1 hour. +* **qexp**, the Express queue: This queue is dedicated for testing and running very small jobs. It is not required to specify a project to enter the qexp. There are 2 nodes always reserved for this queue (w/o accelerator), maximum 8 nodes are available via the qexp for a particular user, from a pool of nodes containing Nvidia accelerated nodes (cn181-203), MIC accelerated nodes (cn204-207). This enables to test and tune also accelerated code. The nodes may be allocated on per core basis. No special authorization is required to use it. The maximum runtime in qexp is 1 hour. * **qprod**, the Production queue: This queue is intended for normal production runs. It is required that active project with nonzero remaining resources is specified to enter the qprod. All nodes may be accessed via the qprod queue, except the reserved ones. 178 nodes without accelerator are included. Full nodes, 16 cores per node are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qprod is 48 hours. * **qlong**, the Long queue: This queue is intended for long production runs. It is required that active project with nonzero remaining resources is specified to enter the qlong. Only 60 nodes without acceleration may be accessed via the qlong queue. Full nodes, 16 cores per node are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qlong is 144 hours (three times of the standard qprod time - 3 x 48 h). -* **qnvidia**, qmic, qfat, the Dedicated queues: The queue qnvidia is dedicated to access the Nvidia accelerated nodes, the qmic to access MIC nodes and qfat the Fat nodes. It is required that active project with nonzero remaining resources is specified to enter these queues. 23 nvidia, 4 mic and 2 fat nodes are included. Full nodes, 16 cores per node are allocated. The queues run with very high priority, the jobs will be scheduled before the jobs coming from the qexp queue. An PI needs explicitly ask [support](https://support.it4i.cz/rt/) for authorization to enter the dedicated queues for all users associated to her/his Project. +* **qnvidia**, **qmic**, **qfat**, the Dedicated queues: The queue qnvidia is dedicated to access the Nvidia accelerated nodes, the qmic to access MIC nodes and qfat the Fat nodes. It is required that active project with nonzero remaining resources is specified to enter these queues. 23 nvidia, 4 mic and 2 fat nodes are included. Full nodes, 16 cores per node are allocated. The queues run with very high priority, the jobs will be scheduled before the jobs coming from the qexp queue. An PI needs explicitly ask [support](https://support.it4i.cz/rt/) for authorization to enter the dedicated queues for all users associated to her/his Project. * **qfree**, The Free resource queue: The queue qfree is intended for utilization of free resources, after a Project exhausted all its allocated computational resources (Does not apply to DD projects by default. DD projects have to request for persmission on qfree after exhaustion of computational resources.). It is required that active project is specified to enter the queue, however no remaining resources are required. Consumed resources will be accounted to the Project. Only 178 nodes without accelerator may be accessed from this queue. Full nodes, 16 cores per node are allocated. The queue runs with very low priority and no special authorization is required to use it. The maximum runtime in qfree is 12 hours. -### Notes +## Queue Notes The job wall clock time defaults to **half the maximum time**, see table above. Longer wall time limits can be [set manually, see examples](job-submission-and-execution/). @@ -35,7 +35,7 @@ Jobs that exceed the reserved wall clock time (Req'd Time) get killed automatica Anselm users may check current queue configuration at <https://extranet.it4i.cz/anselm/queues>. -### Queue Status +## Queue Status !!! tip Check the status of jobs, queues and compute nodes at <https://extranet.it4i.cz/anselm/> @@ -106,24 +106,6 @@ Options: --incl-finished Include finished jobs ``` -## Resources Accounting Policy +---8<--- "resource_accounting.md" -### Core-Hours - -The resources that are currently subject to accounting are the core-hours. The core-hours are accounted on the wall clock basis. The accounting runs whenever the computational cores are allocated or blocked via the PBS Pro workload manager (the qsub command), regardless of whether the cores are actually used for any calculation. 1 core-hour is defined as 1 processor core allocated for 1 hour of wall clock time. Allocating a full node (16 cores) for 1 hour accounts to 16 core-hours. See example in the [Job submission and execution](job-submission-and-execution/) section. - -### Check Consumed Resources - -!!! note - The **it4ifree** command is a part of it4i.portal.clients package, located here: <https://pypi.python.org/pypi/it4i.portal.clients> - -User may check at any time, how many core-hours have been consumed by himself/herself and his/her projects. The command is available on clusters' login nodes. - -```console -$ it4ifree -Password: - PID Total Used ...by me Free - -------- ------- ------ -------- ------- - OPEN-0-0 1500000 400644 225265 1099356 - DD-13-1 10000 2606 2606 7394 -``` +---8<--- "mathjax.md" diff --git a/docs.it4i/anselm/shell-and-data-access.md b/docs.it4i/anselm/shell-and-data-access.md index e850c88133c723937ecdd17ec6e6eb08d7e7541f..ce096b19338238f4f414abfabd6eac222c0098c2 100644 --- a/docs.it4i/anselm/shell-and-data-access.md +++ b/docs.it4i/anselm/shell-and-data-access.md @@ -15,21 +15,28 @@ The authentication is by the [private key](../general/accessing-the-clusters/she !!! note Please verify SSH fingerprints during the first logon. They are identical on all login nodes: + md5: + 29:b3:f4:64:b0:73:f5:6f:a7:85:0f:e0:0d:be:76:bf (DSA) d4:6f:5c:18:f4:3f:70:ef:bc:fc:cc:2b:fd:13:36:b7 (RSA) + sha256: + + LX2034TYy6Lf0Q7Zf3zOIZuFlG09DaSGROGBz6LBUy4 (DSA) + +DcED3GDoA9piuyvQOho+ltNvwB9SJSYXbB639hbejY (RSA) + Private key authentication: On **Linux** or **Mac**, use ```console -local $ ssh -i /path/to/id_rsa username@anselm.it4i.cz +$ ssh -i /path/to/id_rsa username@anselm.it4i.cz ``` If you see warning message "UNPROTECTED PRIVATE KEY FILE!", use this command to set lower permissions to private key file. ```console -local $ chmod 600 /path/to/id_rsa +$ chmod 600 /path/to/id_rsa ``` On **Windows**, use [PuTTY ssh client](../general/accessing-the-clusters/shell-access-and-data-transfer/putty.md). @@ -82,23 +89,23 @@ To achieve 160MB/s transfer rates, the end user must be connected by 10G line al On linux or Mac, use scp or sftp client to transfer the data to Anselm: ```console -local $ scp -i /path/to/id_rsa my-local-file username@anselm.it4i.cz:directory/file +$ scp -i /path/to/id_rsa my-local-file username@anselm.it4i.cz:directory/file ``` ```console -local $ scp -i /path/to/id_rsa -r my-local-dir username@anselm.it4i.cz:directory +$ scp -i /path/to/id_rsa -r my-local-dir username@anselm.it4i.cz:directory ``` or ```console -local $ sftp -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz +$ sftp -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz ``` Very convenient way to transfer files in and out of the Anselm computer is via the fuse filesystem [sshfs](http://linux.die.net/man/1/sshfs) ```console -local $ sshfs -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz:. mountpoint +$ sshfs -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz:. mountpoint ``` Using sshfs, the users Anselm home directory will be mounted on your local computer, just like an external disk. @@ -143,7 +150,7 @@ It works by tunneling the connection from Anselm back to users workstation and f Pick some unused port on Anselm login node (for example 6000) and establish the port forwarding: ```console -local $ ssh -R 6000:remote.host.com:1234 anselm.it4i.cz +$ ssh -R 6000:remote.host.com:1234 anselm.it4i.cz ``` In this example, we establish port forwarding between port 6000 on Anselm and port 1234 on the remote.host.com. By accessing localhost:6000 on Anselm, an application will see response of remote.host.com:1234. The traffic will run via users local workstation. @@ -183,7 +190,7 @@ Port forwarding is static, each single port is mapped to a particular port on re To establish local proxy server on your workstation, install and run SOCKS proxy server software. On Linux, sshd demon provides the functionality. To establish SOCKS proxy server listening on port 1080 run: ```console -local $ ssh -D 1080 localhost +$ ssh -D 1080 localhost ``` On Windows, install and run the free, open source [Sock Puppet](http://sockspuppet.com/) server. @@ -191,7 +198,7 @@ On Windows, install and run the free, open source [Sock Puppet](http://sockspupp Once the proxy server is running, establish ssh port forwarding from Anselm to the proxy server, port 1080, exactly as [described above](#port-forwarding-from-login-nodes). ```console -local $ ssh -R 6000:localhost:1080 anselm.it4i.cz +$ ssh -R 6000:localhost:1080 anselm.it4i.cz ``` Now, configure the applications proxy settings to **localhost:6000**. Use port forwarding to access the [proxy server from compute nodes](#port-forwarding-from-compute-nodes) as well. diff --git a/docs.it4i/anselm/software/chemistry/molpro.md b/docs.it4i/anselm/software/chemistry/molpro.md deleted file mode 100644 index 9b08cb6ec8d2137e936f391eae4af97789d4f229..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/chemistry/molpro.md +++ /dev/null @@ -1,63 +0,0 @@ -# Molpro - -Molpro is a complete system of ab initio programs for molecular electronic structure calculations. - -## About Molpro - -Molpro is a software package used for accurate ab-initio quantum chemistry calculations. More information can be found at the [official webpage](http://www.molpro.net/). - -## License - -Molpro software package is available only to users that have a valid license. Please contact support to enable access to Molpro if you have a valid license appropriate for running on our cluster (eg. academic research group licence, parallel execution). - -To run Molpro, you need to have a valid license token present in " $HOME/.molpro/token". You can download the token from [Molpro website](https://www.molpro.net/licensee/?portal=licensee). - -## Installed Version - -Currently on Anselm is installed version 2010.1, patch level 45, parallel version compiled with Intel compilers and Intel MPI. - -Compilation parameters are default: - -| Parameter | Value | -| ---------------------------------- | ------------ | -| max number of atoms | 200 | -| max number of valence orbitals | 300 | -| max number of basis functions | 4095 | -| max number of states per symmmetry | 20 | -| max number of state symmetries | 16 | -| max number of records | 200 | -| max number of primitives | maxbfn x [2] | - -## Running - -Molpro is compiled for parallel execution using MPI and OpenMP. By default, Molpro reads the number of allocated nodes from PBS and launches a data server on one node. On the remaining allocated nodes, compute processes are launched, one process per node, each with 16 threads. You can modify this behavior by using -n, -t and helper-server options. Please refer to the [Molpro documentation](http://www.molpro.net/info/2010.1/doc/manual/node9.html) for more details. - -!!! note - The OpenMP parallelization in Molpro is limited and has been observed to produce limited scaling. We therefore recommend to use MPI parallelization only. This can be achieved by passing option mpiprocs=16:ompthreads=1 to PBS. - -You are advised to use the -d option to point to a directory in [SCRATCH file system](../../storage/storage/). Molpro can produce a large amount of temporary data during its run, and it is important that these are placed in the fast scratch file system. - -### Example jobscript - -```bash - #PBS -A IT4I-0-0 - #PBS -q qprod - #PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1 - - cd $PBS_O_WORKDIR - - # load Molpro module - module add molpro - - # create a directory in the SCRATCH filesystem - mkdir -p /scratch/$USER/$PBS_JOBID - - # copy an example input - cp /apps/chem/molpro/2010.1/molprop_2010_1_Linux_x86_64_i8/examples/caffeine_opt_diis.com . - - # run Molpro with default options - molpro -d /scratch/$USER/$PBS_JOBID caffeine_opt_diis.com - - # delete scratch directory - rm -rf /scratch/$USER/$PBS_JOBID -``` diff --git a/docs.it4i/anselm/software/chemistry/nwchem.md b/docs.it4i/anselm/software/chemistry/nwchem.md deleted file mode 100644 index e4f84d49f9b8a38cba53f212d7db1bc6c8c8c7d2..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/chemistry/nwchem.md +++ /dev/null @@ -1,42 +0,0 @@ -# NWChem - -## Introduction - -NWChem aims to provide its users with computational chemistry tools that are scalable both in their ability to treat large scientific computational chemistry problems efficiently, and in their use of available parallel computing resources from high-performance parallel supercomputers to conventional workstation clusters. - -[Homepage](http://www.nwchem-sw.org/index.php/Main_Page) - -## Installed Versions - -The following versions are currently installed: - -* 6.1.1, not recommended, problems have been observed with this version -* 6.3-rev2-patch1, current release with QMD patch applied. Compiled with Intel compilers, MKL and Intel MPI -* 6.3-rev2-patch1-openmpi, same as above, but compiled with OpenMPI and NWChem provided BLAS instead of MKL. This version is expected to be slower -* 6.3-rev2-patch1-venus, this version contains only libraries for VENUS interface linking. Does not provide standalone NWChem executable - -For a current list of installed versions, execute: - -```console -$ ml av nwchem -``` - -## Running - -NWChem is compiled for parallel MPI execution. Normal procedure for MPI jobs applies. Sample jobscript: - -```bash - #PBS -A IT4I-0-0 - #PBS -q qprod - #PBS -l select=1:ncpus=16 - - module add nwchem/6.3-rev2-patch1 - mpirun -np 16 nwchem h2o.nw -``` - -## Options - -Please refer to [the documentation](http://www.nwchem-sw.org/index.php/Release62:Top-level) and in the input file set the following directives : - -* MEMORY : controls the amount of memory NWChem will use -* SCRATCH_DIR : set this to a directory in [SCRATCH file system](../../storage/storage/#scratch) (or run the calculation completely in a scratch directory). For certain calculations, it might be advisable to reduce I/O by forcing "direct" mode, e.g.. "scf direct" diff --git a/docs.it4i/anselm/software/compilers.md b/docs.it4i/anselm/software/compilers.md deleted file mode 100644 index 71e60499b1bb335ddb7a6919e22457aa70b68fa5..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/compilers.md +++ /dev/null @@ -1,156 +0,0 @@ -# Compilers - -## Available Compilers, Including GNU, INTEL, and UPC Compilers - -Currently there are several compilers for different programming languages available on the Anselm cluster: - -* C/C++ -* Fortran 77/90/95 -* Unified Parallel C -* Java -* NVIDIA CUDA - -The C/C++ and Fortran compilers are divided into two main groups GNU and Intel. - -## Intel Compilers - -For information about the usage of Intel Compilers and other Intel products, please read the [Intel Parallel studio](intel-suite/) page. - -## GNU C/C++ and Fortran Compilers - -For compatibility reasons there are still available the original (old 4.4.6-4) versions of GNU compilers as part of the OS. These are accessible in the search path by default. - -It is strongly recommended to use the up to date version (4.8.1) which comes with the module gcc: - -```console -$ ml gcc -$ gcc -v -$ g++ -v -$ gfortran -v -``` - -With the module loaded two environment variables are predefined. One for maximum optimizations on the Anselm cluster architecture, and the other for debugging purposes: - -```console -$ echo $OPTFLAGS - -O3 -march=corei7-avx -$ echo $DEBUGFLAGS - -O0 -g -``` - -For more information about the possibilities of the compilers, please see the man pages. - -## Unified Parallel C - - UPC is supported by two compiler/runtime implementations: - -* GNU - SMP/multi-threading support only -* Berkley - multi-node support as well as SMP/multi-threading support - -### GNU UPC Compiler - -To use the GNU UPC compiler and run the compiled binaries use the module gupc - -```console -$ module add gupc -$ gupc -v -$ g++ -v -``` - -Simple program to test the compiler - -```console -$ cat count.upc - - /* hello.upc - a simple UPC example */ - #include <upc.h> - #include <stdio.h> - - int main() { - if (MYTHREAD == 0) { - printf("Welcome to GNU UPC!!!n"); - } - upc_barrier; - printf(" - Hello from thread %in", MYTHREAD); - return 0; - } -``` - -To compile the example use - -```console -$ gupc -o count.upc.x count.upc -``` - -To run the example with 5 threads issue - -```console -$ ./count.upc.x -fupc-threads-5 -``` - -For more information see the man pages. - -### Berkley UPC Compiler - -To use the Berkley UPC compiler and runtime environment to run the binaries use the module bupc - -```console -$ module add bupc -$ upcc -version -``` - -As default UPC network the "smp" is used. This is very quick and easy way for testing/debugging, but limited to one node only. - -For production runs, it is recommended to use the native Infiband implementation of UPC network "ibv". For testing/debugging using multiple nodes, the "mpi" UPC network is recommended. - -!!! warning - Selection of the network is done at the compile time and not at runtime (as expected)! - -Example UPC code: - -```console -$ cat hello.upc - - /* hello.upc - a simple UPC example */ - #include <upc.h> - #include <stdio.h> - - int main() { - if (MYTHREAD == 0) { - printf("Welcome to Berkeley UPC!!!n"); - } - upc_barrier; - printf(" - Hello from thread %in", MYTHREAD); - return 0; - } -``` - -To compile the example with the "ibv" UPC network use - -```console -$ upcc -network=ibv -o hello.upc.x hello.upc -``` - -To run the example with 5 threads issue - -```console -$ upcrun -n 5 ./hello.upc.x -``` - -To run the example on two compute nodes using all 32 cores, with 32 threads, issue - -```console -$ qsub -I -q qprod -A PROJECT_ID -l select=2:ncpus=16 -$ module add bupc -$ upcrun -n 32 ./hello.upc.x -``` - -For more information see the man pages. - -## Java - -For information how to use Java (runtime and/or compiler), please read the [Java page](java/). - -## NVIDIA CUDA - -For information on how to work with NVIDIA CUDA, please read the [NVIDIA CUDA page](nvidia-cuda/). diff --git a/docs.it4i/anselm/software/comsol-multiphysics.md b/docs.it4i/anselm/software/comsol-multiphysics.md deleted file mode 100644 index 74672428542f3643d754768b7d3c44ed22f22cb6..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/comsol-multiphysics.md +++ /dev/null @@ -1,120 +0,0 @@ -# COMSOL Multiphysics - -## Introduction - -[COMSOL](http://www.comsol.com) is a powerful environment for modelling and solving various engineering and scientific problems based on partial differential equations. COMSOL is designed to solve coupled or multiphysics phenomena. For many -standard engineering problems COMSOL provides add-on products such as electrical, mechanical, fluid flow, and chemical -applications. - -* [Structural Mechanics Module](http://www.comsol.com/structural-mechanics-module), -* [Heat Transfer Module](http://www.comsol.com/heat-transfer-module), -* [CFD Module](http://www.comsol.com/cfd-module), -* [Acoustics Module](http://www.comsol.com/acoustics-module), -* and [many others](http://www.comsol.com/products) - -COMSOL also allows an interface support for equation-based modelling of partial differential equations. - -## Execution - -On the Anselm cluster COMSOL is available in the latest stable version. There are two variants of the release: - -* **Non commercial** or so called **EDU variant**, which can be used for research and educational purposes. -* **Commercial** or so called **COM variant**, which can used also for commercial activities. **COM variant** has only subset of features compared to the **EDU variant** available. More about licensing will be posted here soon. - -To load the of COMSOL load the module - -```console -$ ml comsol -``` - -By default the **EDU variant** will be loaded. If user needs other version or variant, load the particular version. To obtain the list of available versions use - -```console -$ ml av comsol -``` - -If user needs to prepare COMSOL jobs in the interactive mode it is recommend to use COMSOL on the compute nodes via PBS Pro scheduler. In order run the COMSOL Desktop GUI on Windows is recommended to use the Virtual Network Computing (VNC). - -```console -$ xhost + -$ qsub -I -X -A PROJECT_ID -q qprod -l select=1:ncpus=16 -$ ml comsol -$ comsol -``` - -To run COMSOL in batch mode, without the COMSOL Desktop GUI environment, user can utilized the default (comsol.pbs) job script and execute it via the qsub command. - -```bash -#!/bin/bash -#PBS -l select=3:ncpus=16 -#PBS -q qprod -#PBS -N JOB_NAME -#PBS -A PROJECT_ID - -cd /scratch/$USER/ || exit - -echo Time is `date` -echo Directory is `pwd` -echo '**PBS_NODEFILE***START*******' -cat $PBS_NODEFILE -echo '**PBS_NODEFILE***END*********' - -text_nodes < cat $PBS_NODEFILE - -module load comsol -# module load comsol/43b-COM - -ntask=$(wc -l $PBS_NODEFILE) - -comsol -nn ${ntask} batch -configuration /tmp –mpiarg –rmk –mpiarg pbs -tmpdir /scratch/$USER/ -inputfile name_input_f.mph -outputfile name_output_f.mph -batchlog name_log_f.log -``` - -Working directory has to be created before sending the (comsol.pbs) job script into the queue. Input file (name_input_f.mph) has to be in working directory or full path to input file has to be specified. The appropriate path to the temp directory of the job has to be set by command option (-tmpdir). - -## LiveLink for MATLAB - -COMSOL is the software package for the numerical solution of the partial differential equations. LiveLink for MATLAB allows connection to the COMSOL API (Application Programming Interface) with the benefits of the programming language and computing environment of the MATLAB. - -LiveLink for MATLAB is available in both **EDU** and **COM** **variant** of the COMSOL release. On Anselm 1 commercial (**COM**) license and the 5 educational (**EDU**) licenses of LiveLink for MATLAB (please see the [ISV Licenses](isv_licenses/)) are available. -Following example shows how to start COMSOL model from MATLAB via LiveLink in the interactive mode. - -```console -$ xhost + -$ qsub -I -X -A PROJECT_ID -q qexp -l select=1:ncpus=16 -$ ml matlab -$ ml comsol -$ comsol server matlab -``` - -At the first time to launch the LiveLink for MATLAB (client-MATLAB/server-COMSOL connection) the login and password is requested and this information is not requested again. - -To run LiveLink for MATLAB in batch mode with (comsol_matlab.pbs) job script you can utilize/modify the following script and execute it via the qsub command. - -```bash -#!/bin/bash -#PBS -l select=3:ncpus=16 -#PBS -q qprod -#PBS -N JOB_NAME -#PBS -A PROJECT_ID - -cd /scratch/$USER || exit - -echo Time is `date` -echo Directory is `pwd` -echo '**PBS_NODEFILE***START*******' -cat $PBS_NODEFILE -echo '**PBS_NODEFILE***END*********' - -text_nodes < cat $PBS_NODEFILE - -module load matlab -module load comsol/43b-EDU - -ntask=$(wc -l $PBS_NODEFILE) - -comsol -nn ${ntask} server -configuration /tmp -mpiarg -rmk -mpiarg pbs -tmpdir /scratch/$USER & -cd /apps/engineering/comsol/comsol43b/mli -matlab -nodesktop -nosplash -r "mphstart; addpath /scratch/$USER; test_job" -``` - -This example shows, how to run LiveLink for MATLAB with following configuration: 3 nodes and 16 cores per node. Working directory has to be created before submitting (comsol_matlab.pbs) job script into the queue. Input file (test_job.m) has to be in working directory or full path to input file has to be specified. The MATLAB command option (-r ”mphstart”) created a connection with a COMSOL server using the default port number. diff --git a/docs.it4i/anselm/software/debuggers/allinea-ddt.md b/docs.it4i/anselm/software/debuggers/allinea-ddt.md deleted file mode 100644 index f85848417002cc5c9f15d54ea437410ca4585f11..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/debuggers/allinea-ddt.md +++ /dev/null @@ -1,94 +0,0 @@ -# Allinea Forge (DDT,MAP) - -Allinea Forge consist of two tools - debugger DDT and profiler MAP. - -Allinea DDT, is a commercial debugger primarily for debugging parallel MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel Xeon Phi accelerators. DDT provides all the standard debugging features (stack trace, breakpoints, watches, view variables, threads etc.) for every thread running as part of your program, or for every process - even if these processes are distributed across a cluster using an MPI implementation. - -Allinea MAP is a profiler for C/C++/Fortran HPC codes. It is designed for profiling parallel code, which uses pthreads, OpenMP or MPI. - -## License and Limitations for Anselm Users - -On Anselm users can debug OpenMP or MPI code that runs up to 64 parallel processes. In case of debugging GPU or Xeon Phi accelerated codes the limit is 8 accelerators. These limitation means that: - -* 1 user can debug up 64 processes, or -* 32 users can debug 2 processes, etc. - -In case of debugging on accelerators: - -* 1 user can debug on up to 8 accelerators, or -* 8 users can debug on single accelerator. - -## Compiling Code to Run With DDT - -### Modules - -Load all necessary modules to compile the code. For example: - -```console -$ ml intel -$ ml impi ... or ... module load openmpi/X.X.X-icc -``` - -Load the Allinea DDT module: - -```console -$ ml Forge -``` - -Compile the code: - -```console -$ mpicc -g -O0 -o test_debug test.c - -$ mpif90 -g -O0 -o test_debug test.f -``` - -### Compiler Flags - -Before debugging, you need to compile your code with theses flags: - -!!! note - \* **g** : Generates extra debugging information usable by GDB. -g3 includes even more debugging information. This option is available for GNU and INTEL C/C++ and Fortran compilers. - \* **O0** : Suppress all optimizations. - -## Starting a Job With DDT - -Be sure to log in with an X window forwarding enabled. This could mean using the -X in the ssh: - -```console -$ ssh -X username@anselm.it4i.cz -``` - -Other options is to access login node using VNC. Please see the detailed information on how to [use graphic user interface on Anselm](/general/accessing-the-clusters/graphical-user-interface/x-window-system/) - -From the login node an interactive session **with X windows forwarding** (-X option) can be started by following command: - -```console -$ qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=16:mpiprocs=16,walltime=01:00:00 -``` - -Then launch the debugger with the ddt command followed by the name of the executable to debug: - -```console -$ ddt test_debug -``` - -A submission window that appears have a prefilled path to the executable to debug. You can select the number of MPI processors and/or OpenMP threads on which to run and press run. Command line arguments to a program can be entered to the "Arguments " box. - - - -To start the debugging directly without the submission window, user can specify the debugging and execution parameters from the command line. For example the number of MPI processes is set by option "-np 4". Skipping the dialog is done by "-start" option. To see the list of the "ddt" command line parameters, run "ddt --help". - -```console -ddt -start -np 4 ./hello_debug_impi -``` - -## Documentation - -Users can find original User Guide after loading the DDT module: - -```console -$DDTPATH/doc/userguide.pdf -``` - -[1] Discipline, Magic, Inspiration and Science: Best Practice Debugging with Allinea DDT, Workshop conducted at LLNL by Allinea on May 10, 2013, [link](https://computing.llnl.gov/tutorials/allineaDDT/index.html) diff --git a/docs.it4i/anselm/software/debuggers/allinea-performance-reports.md b/docs.it4i/anselm/software/debuggers/allinea-performance-reports.md deleted file mode 100644 index a5399a61e7ae133d4c037391a1123b0170a132ec..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/debuggers/allinea-performance-reports.md +++ /dev/null @@ -1,59 +0,0 @@ -# Allinea Performance Reports - -## Introduction - -Allinea Performance Reports characterize the performance of HPC application runs. After executing your application through the tool, a synthetic HTML report is generated automatically, containing information about several metrics along with clear behavior statements and hints to help you improve the efficiency of your runs. - -The Allinea Performance Reports is most useful in profiling MPI programs. - -Our license is limited to 64 MPI processes. - -## Modules - -Allinea Performance Reports version 6.0 is available - -```console -$ ml PerformanceReports/6.0 -``` - -The module sets up environment variables, required for using the Allinea Performance Reports. This particular command loads the default module, which is performance reports version 4.2. - -## Usage - -!!! note - Use the the perf-report wrapper on your (MPI) program. - -Instead of [running your MPI program the usual way](../mpi/), use the the perf report wrapper: - -```console -$ perf-report mpirun ./mympiprog.x -``` - -The mpi program will run as usual. The perf-report creates two additional files, in \*.txt and \*.html format, containing the performance report. Note that [demanding MPI codes should be run within the queue system](../../job-submission-and-execution/). - -## Example - -In this example, we will be profiling the mympiprog.x MPI program, using Allinea performance reports. Assume that the code is compiled with Intel compilers and linked against Intel MPI library: - -First, we allocate some nodes via the express queue: - -```console -$ qsub -q qexp -l select=2:ncpus=16:mpiprocs=16:ompthreads=1 -I - qsub: waiting for job 262197.dm2 to start - qsub: job 262197.dm2 ready -``` - -Then we load the modules and run the program the usual way: - -```console -$ ml intel impi allinea-perf-report/4.2 -$ mpirun ./mympiprog.x -``` - -Now lets profile the code: - -```console -$ perf-report mpirun ./mympiprog.x -``` - -Performance report files [mympiprog_32p\*.txt](../../../src/mympiprog_32p_2014-10-15_16-56.txt) and [mympiprog_32p\*.html](../../../src/mympiprog_32p_2014-10-15_16-56.html) were created. We can see that the code is very efficient on MPI and is CPU bounded. diff --git a/docs.it4i/anselm/software/debuggers/debuggers.md b/docs.it4i/anselm/software/debuggers/debuggers.md deleted file mode 100644 index 3d38fd6a59565a1814df261d6cc2383f9bef7c59..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/debuggers/debuggers.md +++ /dev/null @@ -1,60 +0,0 @@ -# Debuggers and profilers summary - -## Introduction - -We provide state of the art programms and tools to develop, profile and debug HPC codes at IT4Innovations. On these pages, we provide an overview of the profiling and debugging tools available on Anslem at IT4I. - -## Intel Debugger - -The intel debugger version 13.0 is available, via module intel. The debugger works for applications compiled with C and C++ compiler and the ifort fortran 77/90/95 compiler. The debugger provides java GUI environment. Use X display for running the GUI. - -```console -$ ml intel -$ idb -``` - -Read more at the [Intel Debugger](intel-suite/intel-debugger/) page. - -## Allinea Forge (DDT/MAP) - -Allinea DDT, is a commercial debugger primarily for debugging parallel MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel Xeon Phi accelerators. DDT provides all the standard debugging features (stack trace, breakpoints, watches, view variables, threads etc.) for every thread running as part of your program, or for every process even if these processes are distributed across a cluster using an MPI implementation. - -```console -$ ml Forge -$ forge -``` - -Read more at the [Allinea DDT](debuggers/allinea-ddt/) page. - -## Allinea Performance Reports - -Allinea Performance Reports characterize the performance of HPC application runs. After executing your application through the tool, a synthetic HTML report is generated automatically, containing information about several metrics along with clear behavior statements and hints to help you improve the efficiency of your runs. Our license is limited to 64 MPI processes. - -```console -$ ml PerformanceReports/6.0 -$ perf-report mpirun -n 64 ./my_application argument01 argument02 -``` - -Read more at the [Allinea Performance Reports](debuggers/allinea-performance-reports/) page. - -## RougeWave Totalview - -TotalView is a source- and machine-level debugger for multi-process, multi-threaded programs. Its wide range of tools provides ways to analyze, organize, and test programs, making it easy to isolate and identify problems in individual threads and processes in programs of great complexity. - -```console -$ ml totalview -$ totalview -``` - -Read more at the [Totalview](debuggers/total-view/) page. - -## Vampir Trace Analyzer - -Vampir is a GUI trace analyzer for traces in OTF format. - -```console -$ ml Vampir/8.5.0 -$ vampir -``` - -Read more at the [Vampir](vampir/) page. diff --git a/docs.it4i/anselm/software/debuggers/intel-vtune-amplifier.md b/docs.it4i/anselm/software/debuggers/intel-vtune-amplifier.md deleted file mode 100644 index 1d90aacfee0141246d4fbe41912ca8e3040b30db..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/debuggers/intel-vtune-amplifier.md +++ /dev/null @@ -1,73 +0,0 @@ -# Intel VTune Amplifier - -## Introduction - -Intel VTune Amplifier, part of Intel Parallel studio, is a GUI profiling tool designed for Intel processors. It offers a graphical performance analysis of single core and multithreaded applications. A highlight of the features: - -* Hotspot analysis -* Locks and waits analysis -* Low level specific counters, such as branch analysis and memory - bandwidth -* Power usage analysis - frequency and sleep states. - - - -## Usage - -To launch the GUI, first load the module: - -```console -$ module add VTune/2016_update1 -``` - -and launch the GUI : - -```console -$ amplxe-gui -``` - -!!! note - To profile an application with VTune Amplifier, special kernel modules need to be loaded. The modules are not loaded on Anselm login nodes, thus direct profiling on login nodes is not possible. Use VTune on compute nodes and refer to the documentation on using GUI applications. - -The GUI will open in new window. Click on "_New Project..._" to create a new project. After clicking _OK_, a new window with project properties will appear. At "_Application:_", select the bath to your binary you want to profile (the binary should be compiled with -g flag). Some additional options such as command line arguments can be selected. At "_Managed code profiling mode:_" select "_Native_" (unless you want to profile managed mode .NET/Mono applications). After clicking _OK_, your project is created. - -To run a new analysis, click "_New analysis..._". You will see a list of possible analysis. Some of them will not be possible on the current CPU (e.g. Intel Atom analysis is not possible on Sandy Bridge CPU), the GUI will show an error box if you select the wrong analysis. For example, select "_Advanced Hotspots_". Clicking on _Start _will start profiling of the application. - -## Remote Analysis - -VTune Amplifier also allows a form of remote analysis. In this mode, data for analysis is collected from the command line without GUI, and the results are then loaded to GUI on another machine. This allows profiling without interactive graphical jobs. To perform a remote analysis, launch a GUI somewhere, open the new analysis window and then click the button "_Command line_" in bottom right corner. It will show the command line needed to perform the selected analysis. - -The command line will look like this: - -```console -$ /apps/all/VTune/2016_update1/vtune_amplifier_xe_2016.1.1.434111/bin64/amplxe-cl -collect advanced-hotspots -knob collection-detail=stack-and-callcount -mrte-mode=native -target-duration-type=veryshort -app-working-dir /home/sta545/test -- /home/sta545/test_pgsesv -``` - -Copy the line to clipboard and then you can paste it in your jobscript or in command line. After the collection is run, open the GUI once again, click the menu button in the upper right corner, and select "_Open > Result..._". The GUI will load the results from the run. - -## Xeon Phi - -!!! note - This section is outdated. It will be updated with new information soon. - -It is possible to analyze both native and offload Xeon Phi applications. For offload mode, just specify the path to the binary. For native mode, you need to specify in project properties: - -Application: ssh - -Application parameters: mic0 source ~/.profile && /path/to/your/bin - -Note that we include source ~/.profile in the command to setup environment paths [as described here](../intel-xeon-phi/). - -!!! note - If the analysis is interrupted or aborted, further analysis on the card might be impossible and you will get errors like "ERROR connecting to MIC card". In this case please contact our support to reboot the MIC card. - -You may also use remote analysis to collect data from the MIC and then analyze it in the GUI later : - -```console -$ amplxe-cl -collect knc-hotspots -no-auto-finalize -- ssh mic0 - "export LD_LIBRARY_PATH=/apps/intel/composer_xe_2015.2.164/compiler/lib/mic/:/apps/intel/composer_xe_2015.2.164/mkl/lib/mic/; export KMP_AFFINITY=compact; /tmp/app.mic" -``` - -## References - -1. <https://www.rcac.purdue.edu/tutorials/phi/PerformanceTuningXeonPhi-Tullos.pdf> Performance Tuning for Intel® Xeon Phi™ Coprocessors diff --git a/docs.it4i/anselm/software/debuggers/total-view.md b/docs.it4i/anselm/software/debuggers/total-view.md deleted file mode 100644 index de618ace58562f36720e41a5dbb603c9b2478c06..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/debuggers/total-view.md +++ /dev/null @@ -1,158 +0,0 @@ -# Total View - -TotalView is a GUI-based source code multi-process, multi-thread debugger. - -## License and Limitations for Anselm Users - -On Anselm users can debug OpenMP or MPI code that runs up to 64 parallel processes. These limitation means that: - -```console - 1 user can debug up 64 processes, or - 32 users can debug 2 processes, etc. -``` - -Debugging of GPU accelerated codes is also supported. - -You can check the status of the licenses here: - -```console -$ cat /apps/user/licenses/totalview_features_state.txt - - # totalview - # ------------------------------------------------- - # FEATURE TOTAL USED AVAIL - # ------------------------------------------------- - TotalView_Team 64 0 64 - Replay 64 0 64 - CUDA 64 0 64 -``` - -## Compiling Code to Run With TotalView - -### Modules - -Load all necessary modules to compile the code. For example: - -```console -$ ml intel **or** ml foss -``` - -Load the TotalView module: - -```console -$ ml totalview/8.12 -``` - -Compile the code: - -```console -$ mpicc -g -O0 -o test_debug test.c -$ mpif90 -g -O0 -o test_debug test.f -``` - -### Compiler Flags - -Before debugging, you need to compile your code with theses flags: - -!!! note - \* **-g** : Generates extra debugging information usable by GDB. **-g3** includes even more debugging information. This option is available for GNU and INTEL C/C++ and Fortran compilers. - \* **-O0** : Suppress all optimizations. - -## Starting a Job With TotalView - -Be sure to log in with an X window forwarding enabled. This could mean using the -X in the ssh: - -```console -local $ ssh -X username@anselm.it4i.cz -``` - -Other options is to access login node using VNC. Please see the detailed information on how to use graphic user interface on Anselm. - -From the login node an interactive session with X windows forwarding (-X option) can be started by following command: - -```console -$ qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=16:mpiprocs=16,walltime=01:00:00 -``` - -Then launch the debugger with the totalview command followed by the name of the executable to debug. - -### Debugging a Serial Code - -To debug a serial code use: - -```console -$ totalview test_debug -``` - -### Debugging a Parallel Code - Option 1 - -To debug a parallel code compiled with **OpenMPI** you need to setup your TotalView environment: - -!!! hint - To be able to run parallel debugging procedure from the command line without stopping the debugger in the mpiexec source code you have to add the following function to your `~/.tvdrc` file: - -```console - proc mpi_auto_run_starter {loaded_id} { - set starter_programs {mpirun mpiexec orterun} - set executable_name [TV::symbol get $loaded_id full_pathname] - set file_component [file tail $executable_name] - - if {[lsearch -exact $starter_programs $file_component] != -1} { - puts "*************************************" - puts "Automatically starting $file_component" - puts "*************************************" - dgo - } - } - - # Append this function to TotalView's image load callbacks so that - # TotalView run this program automatically. - - dlappend TV::image_load_callbacks mpi_auto_run_starter -``` - -The source code of this function can be also found in - -```console -$ /apps/mpi/openmpi/intel/1.6.5/etc/openmpi-totalview.tcl -``` - -!!! note - You can also add only following line to you ~/.tvdrc file instead of the entire function: - **source /apps/mpi/openmpi/intel/1.6.5/etc/openmpi-totalview.tcl** - -You need to do this step only once. - -Now you can run the parallel debugger using: - -```console -$ mpirun -tv -n 5 ./test_debug -``` - -When following dialog appears click on "Yes" - - - -At this point the main TotalView GUI window will appear and you can insert the breakpoints and start debugging: - - - -### Debugging a Parallel Code - Option 2 - -Other option to start new parallel debugging session from a command line is to let TotalView to execute mpirun by itself. In this case user has to specify a MPI implementation used to compile the source code. - -The following example shows how to start debugging session with Intel MPI: - -```console -$ ml intel -$ ml totalview -$ totalview -mpi "Intel MPI-Hydra" -np 8 ./hello_debug_impi -``` - -After running previous command you will see the same window as shown in the screenshot above. - -More information regarding the command line parameters of the TotalView can be found TotalView Reference Guide, Chapter 7: TotalView Command Syntax. - -## Documentation - -[1] The [TotalView documentation](http://www.roguewave.com/support/product-documentation/totalview-family.aspx#totalview) web page is a good resource for learning more about some of the advanced TotalView features. diff --git a/docs.it4i/anselm/software/debuggers/vampir.md b/docs.it4i/anselm/software/debuggers/vampir.md deleted file mode 100644 index 1dfa23e7b8eed6c9deaf04439df6b01ed6358480..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/debuggers/vampir.md +++ /dev/null @@ -1,22 +0,0 @@ -# Vampir - -Vampir is a commercial trace analysis and visualization tool. It can work with traces in OTF and OTF2 formats. It does not have the functionality to collect traces, you need to use a trace collection tool (such as [Score-P](score-p/)) first to collect the traces. - - - -## Installed Versions - -Version 8.5.0 is currently installed as module Vampir/8.5.0 : - -```console -$ ml Vampir/8.5.0 -$ vampir & -``` - -## User Manual - -You can find the detailed user manual in PDF format in $EBROOTVAMPIR/doc/vampir-manual.pdf - -## References - -[1]. <https://www.vampir.eu> diff --git a/docs.it4i/anselm/software/intel-suite/intel-compilers.md b/docs.it4i/anselm/software/intel-suite/intel-compilers.md deleted file mode 100644 index d446655d915833a139353d5c76015f70db9a9645..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/intel-suite/intel-compilers.md +++ /dev/null @@ -1,36 +0,0 @@ -# Intel Compilers - -The Intel compilers version 13.1.1 are available, via module intel. The compilers include the icc C and C++ compiler and the ifort fortran 77/90/95 compiler. - -```console -$ ml intel -$ icc -v -$ ifort -v -``` - -The intel compilers provide for vectorization of the code, via the AVX instructions and support threading parallelization via OpenMP - -For maximum performance on the Anselm cluster, compile your programs using the AVX instructions, with reporting where the vectorization was used. We recommend following compilation options for high performance - -```console -$ icc -ipo -O3 -vec -xAVX -vec-report1 myprog.c mysubroutines.c -o myprog.x -$ ifort -ipo -O3 -vec -xAVX -vec-report1 myprog.f mysubroutines.f -o myprog.x -``` - -In this example, we compile the program enabling interprocedural optimizations between source files (-ipo), aggressive loop optimizations (-O3) and vectorization (-vec -xAVX) - -The compiler recognizes the omp, simd, vector and ivdep pragmas for OpenMP parallelization and AVX vectorization. Enable the OpenMP parallelization by the **-openmp** compiler switch. - -```console -$ icc -ipo -O3 -vec -xAVX -vec-report1 -openmp myprog.c mysubroutines.c -o myprog.x -$ ifort -ipo -O3 -vec -xAVX -vec-report1 -openmp myprog.f mysubroutines.f -o myprog.x -``` - -Read more at <http://software.intel.com/sites/products/documentation/doclib/stdxe/2013/composerxe/compiler/cpp-lin/index.htm> - -## Sandy Bridge/Haswell Binary Compatibility - -Anselm nodes are currently equipped with Sandy Bridge CPUs, while Salomon will use Haswell architecture. >The new processors are backward compatible with the Sandy Bridge nodes, so all programs that ran on the Sandy Bridge processors, should also run on the new Haswell nodes. >To get optimal performance out of the Haswell processors a program should make use of the special AVX2 instructions for this processor. One can do this by recompiling codes with the compiler flags >designated to invoke these instructions. For the Intel compiler suite, there are two ways of doing this: - -* Using compiler flag (both for Fortran and C): -xCORE-AVX2. This will create a binary with AVX2 instructions, specifically for the Haswell processors. Note that the executable will not run on Sandy Bridge nodes. -* Using compiler flags (both for Fortran and C): -xAVX -axCORE-AVX2. This will generate multiple, feature specific auto-dispatch code paths for Intel® processors, if there is a performance benefit. So this binary will run both on Sandy Bridge and Haswell processors. During runtime it will be decided which path to follow, dependent on which processor you are running on. In general this will result in larger binaries. diff --git a/docs.it4i/anselm/software/intel-suite/intel-debugger.md b/docs.it4i/anselm/software/intel-suite/intel-debugger.md deleted file mode 100644 index d3a5807fca1a0051c4424a5613f3faa57c26895a..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/intel-suite/intel-debugger.md +++ /dev/null @@ -1,73 +0,0 @@ -# Intel Debugger - -## Debugging Serial Applications - -The intel debugger version 13.0 is available, via module intel. The debugger works for applications compiled with C and C++ compiler and the ifort fortran 77/90/95 compiler. The debugger provides java GUI environment. Use X display for running the GUI. - -```baconsolesh -$ ml intel -$ idb -``` - -The debugger may run in text mode. To debug in text mode, use - -```console -$ idbc -``` - -To debug on the compute nodes, module intel must be loaded. The GUI on compute nodes may be accessed using the same way as in the GUI section - -Example: - -```console -$ qsub -q qexp -l select=1:ncpus=16 -X -I - qsub: waiting for job 19654.srv11 to start - qsub: job 19654.srv11 ready - -$ ml intel -$ ml java -$ icc -O0 -g myprog.c -o myprog.x -$ idb ./myprog.x -``` - -In this example, we allocate 1 full compute node, compile program myprog.c with debugging options -O0 -g and run the idb debugger interactively on the myprog.x executable. The GUI access is via X11 port forwarding provided by the PBS workload manager. - -## Debugging Parallel Applications - -Intel debugger is capable of debugging multithreaded and MPI parallel programs as well. - -### Small Number of MPI Ranks - -For debugging small number of MPI ranks, you may execute and debug each rank in separate xterm terminal (do not forget the X display. Using Intel MPI, this may be done in following way: - -```console -$ qsub -q qexp -l select=2:ncpus=16 -X -I - qsub: waiting for job 19654.srv11 to start - qsub: job 19655.srv11 ready - -$ ml intel -$ mpirun -ppn 1 -hostfile $PBS_NODEFILE --enable-x xterm -e idbc ./mympiprog.x -``` - -In this example, we allocate 2 full compute node, run xterm on each node and start idb debugger in command line mode, debugging two ranks of mympiprog.x application. The xterm will pop up for each rank, with idb prompt ready. The example is not limited to use of Intel MPI - -### Large Number of MPI Ranks - -Run the idb debugger from within the MPI debug option. This will cause the debugger to bind to all ranks and provide aggregated outputs across the ranks, pausing execution automatically just after startup. You may then set break points and step the execution manually. Using Intel MPI: - -```console - $ qsub -q qexp -l select=2:ncpus=16 -X -I - qsub: waiting for job 19654.srv11 to start - qsub: job 19655.srv11 ready - -$ ml intel -$ mpirun -n 32 -idb ./mympiprog.x -``` - -### Debugging Multithreaded Application - -Run the idb debugger in GUI mode. The menu Parallel contains number of tools for debugging multiple threads. One of the most useful tools is the **Serialize Execution** tool, which serializes execution of concurrent threads for easy orientation and identification of concurrency related bugs. - -## Further Information - -Exhaustive manual on idb features and usage is published at [Intel website](http://software.intel.com/sites/products/documentation/doclib/stdxe/2013/composerxe/debugger/user_guide/index.htm) diff --git a/docs.it4i/anselm/software/intel-suite/intel-integrated-performance-primitives.md b/docs.it4i/anselm/software/intel-suite/intel-integrated-performance-primitives.md deleted file mode 100644 index 8e0451c69a082275e114c92acd223e3514317389..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/intel-suite/intel-integrated-performance-primitives.md +++ /dev/null @@ -1,81 +0,0 @@ -# Intel IPP - -## Intel Integrated Performance Primitives - -Intel Integrated Performance Primitives, version 7.1.1, compiled for AVX vector instructions is available, via module ipp. The IPP is a very rich library of highly optimized algorithmic building blocks for media and data applications. This includes signal, image and frame processing algorithms, such as FFT, FIR, Convolution, Optical Flow, Hough transform, Sum, MinMax, as well as cryptographic functions, linear algebra functions and many more. - -!!! note - Check out IPP before implementing own math functions for data processing, it is likely already there. - -```console -$ ml ipp -``` - -The module sets up environment variables, required for linking and running ipp enabled applications. - -## IPP Example - -```cpp - #include "ipp.h" - #include <stdio.h> - int main(int argc, char* argv[]) - { - const IppLibraryVersion *lib; - Ipp64u fm; - IppStatus status; - - status= ippInit(); //IPP initialization with the best optimization layer - if( status != ippStsNoErr ) { - printf("IppInit() Error:n"); - printf("%sn", ippGetStatusString(status) ); - return -1; - } - - //Get version info - lib = ippiGetLibVersion(); - printf("%s %sn", lib->Name, lib->Version); - - //Get CPU features enabled with selected library level - fm=ippGetEnabledCpuFeatures(); - printf("SSE :%cn",(fm>1)&1?'Y':'N'); - printf("SSE2 :%cn",(fm>2)&1?'Y':'N'); - printf("SSE3 :%cn",(fm>3)&1?'Y':'N'); - printf("SSSE3 :%cn",(fm>4)&1?'Y':'N'); - printf("SSE41 :%cn",(fm>6)&1?'Y':'N'); - printf("SSE42 :%cn",(fm>7)&1?'Y':'N'); - printf("AVX :%cn",(fm>8)&1 ?'Y':'N'); - printf("AVX2 :%cn", (fm>15)&1 ?'Y':'N' ); - printf("----------n"); - printf("OS Enabled AVX :%cn", (fm>9)&1 ?'Y':'N'); - printf("AES :%cn", (fm>10)&1?'Y':'N'); - printf("CLMUL :%cn", (fm>11)&1?'Y':'N'); - printf("RDRAND :%cn", (fm>13)&1?'Y':'N'); - printf("F16C :%cn", (fm>14)&1?'Y':'N'); - - return 0; - } -``` - -Compile above example, using any compiler and the ipp module. - -```console -$ ml intel -$ ml ipp - -$ icc testipp.c -o testipp.x -lippi -lipps -lippcore -``` - -You will need the ipp module loaded to run the ipp enabled executable. This may be avoided, by compiling library search paths into the executable - -```console -$ ml intel -$ ml ipp - -$ icc testipp.c -o testipp.x -Wl,-rpath=$LIBRARY_PATH -lippi -lipps -lippcore -``` - -## Code Samples and Documentation - -Intel provides number of [Code Samples for IPP](https://software.intel.com/en-us/articles/code-samples-for-intel-integrated-performance-primitives-library), illustrating use of IPP. - -Read full documentation on IPP [on Intel website,](http://software.intel.com/sites/products/search/search.php?q=&x=15&y=6&product=ipp&version=7.1&docos=lin) in particular the [IPP Reference manual.](http://software.intel.com/sites/products/documentation/doclib/ipp_sa/71/ipp_manual/index.htm) diff --git a/docs.it4i/anselm/software/intel-suite/intel-mkl.md b/docs.it4i/anselm/software/intel-suite/intel-mkl.md deleted file mode 100644 index 6594f8193b800fa1fb269b8611456c6311adafcf..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/intel-suite/intel-mkl.md +++ /dev/null @@ -1,114 +0,0 @@ -# Intel MKL - -## Intel Math Kernel Library - -Intel Math Kernel Library (Intel MKL) is a library of math kernel subroutines, extensively threaded and optimized for maximum performance. Intel MKL provides these basic math kernels: - -* BLAS (level 1, 2, and 3) and LAPACK linear algebra routines, offering vector, vector-matrix, and matrix-matrix operations. -* The PARDISO direct sparse solver, an iterative sparse solver, and supporting sparse BLAS (level 1, 2, and 3) routines for solving sparse systems of equations. -* ScaLAPACK distributed processing linear algebra routines for Linux and Windows operating systems, as well as the Basic Linear Algebra Communications Subprograms (BLACS) and the Parallel Basic Linear Algebra Subprograms (PBLAS). -* Fast Fourier transform (FFT) functions in one, two, or three dimensions with support for mixed radices (not limited to sizes that are powers of 2), as well as distributed versions of these functions. -* Vector Math Library (VML) routines for optimized mathematical operations on vectors. -* Vector Statistical Library (VSL) routines, which offer high-performance vectorized random number generators (RNG) for several probability distributions, convolution and correlation routines, and summary statistics functions. -* Data Fitting Library, which provides capabilities for spline-based approximation of functions, derivatives and integrals of functions, and search. -* Extended Eigensolver, a shared memory version of an eigensolver based on the Feast Eigenvalue Solver. - -For details see the [Intel MKL Reference Manual](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mklman/index.htm). - -Intel MKL is available on Anselm - -```console -$ ml imkl -``` - -The module sets up environment variables, required for linking and running mkl enabled applications. The most important variables are the $MKLROOT, $MKL_INC_DIR, $MKL_LIB_DIR and $MKL_EXAMPLES - -!!! note - The MKL library may be linked using any compiler. With intel compiler use -mkl option to link default threaded MKL. - -### Interfaces - -The MKL library provides number of interfaces. The fundamental once are the LP64 and ILP64. The Intel MKL ILP64 libraries use the 64-bit integer type (necessary for indexing large arrays, with more than 231^-1 elements), whereas the LP64 libraries index arrays with the 32-bit integer type. - -| Interface | Integer type | -| --------- | -------------------------------------------- | -| LP64 | 32-bit, int, integer(kind=4), MPI_INT | -| ILP64 | 64-bit, long int, integer(kind=8), MPI_INT64 | - -### Linking - -Linking MKL libraries may be complex. Intel [mkl link line advisor](http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor) helps. See also [examples](intel-mkl/#examples) below. - -You will need the mkl module loaded to run the mkl enabled executable. This may be avoided, by compiling library search paths into the executable. Include rpath on the compile line: - -```console -$ icc .... -Wl,-rpath=$LIBRARY_PATH ... -``` - -### Threading - -!!! note - Advantage in using the MKL library is that it brings threaded parallelization to applications that are otherwise not parallel. - -For this to work, the application must link the threaded MKL library (default). Number and behaviour of MKL threads may be controlled via the OpenMP environment variables, such as OMP_NUM_THREADS and KMP_AFFINITY. MKL_NUM_THREADS takes precedence over OMP_NUM_THREADS - -```console -$ export OMP_NUM_THREADS=16 -$ export KMP_AFFINITY=granularity=fine,compact,1,0 -``` - -The application will run with 16 threads with affinity optimized for fine grain parallelization. - -## Examples - -Number of examples, demonstrating use of the MKL library and its linking is available on Anselm, in the $MKL_EXAMPLES directory. In the examples below, we demonstrate linking MKL to Intel and GNU compiled program for multi-threaded matrix multiplication. - -### Working With Examples - -```console -$ ml intel -$ cp -a $MKL_EXAMPLES/cblas /tmp/ -$ cd /tmp/cblas -$ make sointel64 function=cblas_dgemm -``` - -In this example, we compile, link and run the cblas_dgemm example, demonstrating use of MKL example suite installed on Anselm. - -### Example: MKL and Intel Compiler - -```console -$ ml intel -$ cp -a $MKL_EXAMPLES/cblas /tmp/ -$ cd /tmp/cblas -$ icc -w source/cblas_dgemmx.c source/common_func.c -mkl -o cblas_dgemmx.x -$ ./cblas_dgemmx.x data/cblas_dgemmx.d -``` - -In this example, we compile, link and run the cblas_dgemm example, demonstrating use of MKL with icc -mkl option. Using the -mkl option is equivalent to: - -```console -$ icc -w source/cblas_dgemmx.c source/common_func.c -o cblas_dgemmx.x -I$MKL_INC_DIR -L$MKL_LIB_DIR -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -``` - -In this example, we compile and link the cblas_dgemm example, using LP64 interface to threaded MKL and Intel OMP threads implementation. - -### Example: MKL and GNU Compiler - -```console -$ ml gcc -$ ml imkl -$ cp -a $MKL_EXAMPLES/cblas /tmp/ -$ cd /tmp/cblas -$ gcc -w source/cblas_dgemmx.c source/common_func.c -o cblas_dgemmx.x -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lm -$ ./cblas_dgemmx.x data/cblas_dgemmx.d -``` - -In this example, we compile, link and run the cblas_dgemm example, using LP64 interface to threaded MKL and gnu OMP threads implementation. - -## MKL and MIC Accelerators - -The MKL is capable to automatically offload the computations o the MIC accelerator. See section [Intel XeonPhi](../intel-xeon-phi/) for details. - -## Further Reading - -Read more on [Intel website](http://software.intel.com/en-us/intel-mkl), in particular the [MKL users guide](https://software.intel.com/en-us/intel-mkl/documentation/linux). diff --git a/docs.it4i/anselm/software/intel-suite/intel-tbb.md b/docs.it4i/anselm/software/intel-suite/intel-tbb.md deleted file mode 100644 index 497b26f5e46a62604b7eb542bd0579b2c7fbd358..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/intel-suite/intel-tbb.md +++ /dev/null @@ -1,42 +0,0 @@ -# Intel TBB - -## Intel Threading Building Blocks - -Intel Threading Building Blocks (Intel TBB) is a library that supports scalable parallel programming using standard ISO C++ code. It does not require special languages or compilers. To use the library, you specify tasks, not threads, and let the library map tasks onto threads in an efficient manner. The tasks are executed by a runtime scheduler and may -be offloaded to [MIC accelerator](../intel-xeon-phi/). - -Intel TBB version 4.1 is available on Anselm - -```console -$ ml tbb -``` - -The module sets up environment variables, required for linking and running tbb enabled applications. - -!!! note - Link the tbb library, using -ltbb - -## Examples - -Number of examples, demonstrating use of TBB and its built-in scheduler is available on Anselm, in the $TBB_EXAMPLES directory. - -```console -$ ml intel -$ ml tbb -$ cp -a $TBB_EXAMPLES/common $TBB_EXAMPLES/parallel_reduce /tmp/ -$ cd /tmp/parallel_reduce/primes -$ icc -O2 -DNDEBUG -o primes.x main.cpp primes.cpp -ltbb -$ ./primes.x -``` - -In this example, we compile, link and run the primes example, demonstrating use of parallel task-based reduce in computation of prime numbers. - -You will need the tbb module loaded to run the tbb enabled executable. This may be avoided, by compiling library search paths into the executable. - -```console -$ icc -O2 -o primes.x main.cpp primes.cpp -Wl,-rpath=$LIBRARY_PATH -ltbb -``` - -## Further Reading - -Read more on Intel website, <http://software.intel.com/sites/products/documentation/doclib/tbb_sa/help/index.htm> diff --git a/docs.it4i/anselm/software/intel-suite/introduction.md b/docs.it4i/anselm/software/intel-suite/introduction.md deleted file mode 100644 index 879389f3f119e873d375b585da4e56f0dcfa5a79..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/intel-suite/introduction.md +++ /dev/null @@ -1,62 +0,0 @@ -# Intel Parallel Studio - -The Anselm cluster provides following elements of the Intel Parallel Studio XE - -* Intel Compilers -* Intel Debugger -* Intel MKL Library -* Intel Integrated Performance Primitives Library -* Intel Threading Building Blocks Library - -## Intel Compilers - -The Intel compilers version 13.1.3 are available, via module intel. The compilers include the icc C and C++ compiler and the ifort fortran 77/90/95 compiler. - -```console -$ ml intel -$ icc -v -$ ifort -v -``` - -Read more at the [Intel Compilers](intel-compilers/) page. - -## Intel Debugger - -The intel debugger version 13.0 is available, via module intel. The debugger works for applications compiled with C and C++ compiler and the ifort fortran 77/90/95 compiler. The debugger provides java GUI environment. Use X display for running the GUI. - -```console -$ ml intel -$ idb -``` - -Read more at the [Intel Debugger](intel-debugger/) page. - -## Intel Math Kernel Library - -Intel Math Kernel Library (Intel MKL) is a library of math kernel subroutines, extensively threaded and optimized for maximum performance. Intel MKL unites and provides these basic components: BLAS, LAPACK, ScaLapack, PARDISO, FFT, VML, VSL, Data fitting, Feast Eigensolver and many more. - -```console -$ ml imkl -``` - -Read more at the [Intel MKL](intel-mkl/) page. - -## Intel Integrated Performance Primitives - -Intel Integrated Performance Primitives, version 7.1.1, compiled for AVX is available, via module ipp. The IPP is a library of highly optimized algorithmic building blocks for media and data applications. This includes signal, image and frame processing algorithms, such as FFT, FIR, Convolution, Optical Flow, Hough transform, Sum, MinMax and many more. - -```console -$ ml ipp -``` - -Read more at the [Intel IPP](intel-integrated-performance-primitives/) page. - -## Intel Threading Building Blocks - -Intel Threading Building Blocks (Intel TBB) is a library that supports scalable parallel programming using standard ISO C++ code. It does not require special languages or compilers. It is designed to promote scalable data parallel programming. Additionally, it fully supports nested parallelism, so you can build larger parallel components from smaller parallel components. To use the library, you specify tasks, not threads, and let the library map tasks onto threads in an efficient manner. - -```console -$ ml tbb -``` - -Read more at the [Intel TBB](intel-tbb/) page. diff --git a/docs.it4i/anselm/software/java.md b/docs.it4i/anselm/software/java.md deleted file mode 100644 index a9de126760592f8fdb983242eb397ebf00c80c42..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/java.md +++ /dev/null @@ -1,27 +0,0 @@ -# Java - -## Java on ANSELM - -Java is available on Anselm cluster. Activate java by loading the java module - -```console -$ ml Java -``` - -Note that the java module must be loaded on the compute nodes as well, in order to run java on compute nodes. - -Check for java version and path - -```console -$ java -version -$ which java -``` - -With the module loaded, not only the runtime environment (JRE), but also the development environment (JDK) with the compiler is available. - -```console -$ javac -version -$ which javac -``` - -Java applications may use MPI for inter-process communication, in conjunction with OpenMPI. Read more on <http://www.open-mpi.org/faq/?category=java>. This functionality is currently not supported on Anselm cluster. In case you require the java interface to MPI, please contact [Anselm support](https://support.it4i.cz/rt/). diff --git a/docs.it4i/anselm/software/mpi/mpi.md b/docs.it4i/anselm/software/mpi/mpi.md deleted file mode 100644 index 4313bf513d5262a4b3eba0f1ef10380142f3a2ef..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/mpi/mpi.md +++ /dev/null @@ -1,146 +0,0 @@ -# MPI - -## Setting Up MPI Environment - -The Anselm cluster provides several implementations of the MPI library: - -| MPI Library | Thread support | -| ---------------------------------------------------- | --------------------------------------------------------------- | -| The highly optimized and stable **bullxmpi 1.2.4.1** | Partial thread support up to MPI_THREAD_SERIALIZED | -| The **Intel MPI 4.1** | Full thread support up to MPI_THREAD_MULTIPLE | -| The [OpenMPI 1.6.5](href="http://www.open-mpi.org) | Full thread support up to MPI_THREAD_MULTIPLE, BLCR c/r support | -| The OpenMPI 1.8.1 | Full thread support up to MPI_THREAD_MULTIPLE, MPI-3.0 support | -| The **mpich2 1.9** | Full thread support up to MPI_THREAD_MULTIPLE, BLCR c/r support | - -MPI libraries are activated via the environment modules. - -```console -$ ml av mpi/ - ------------------------- /opt/modules/modulefiles/mpi ------------------------- - bullxmpi/bullxmpi-1.2.4.1 mvapich2/1.9-icc - impi/4.0.3.008 openmpi/1.6.5-gcc(default) - impi/4.1.0.024 openmpi/1.6.5-gcc46 - impi/4.1.0.030 openmpi/1.6.5-icc - impi/4.1.1.036(default) openmpi/1.8.1-gcc - openmpi/1.8.1-gcc46 - mvapich2/1.9-gcc(default) openmpi/1.8.1-gcc49 - mvapich2/1.9-gcc46 openmpi/1.8.1-icc -``` - -There are default compilers associated with any particular MPI implementation. The defaults may be changed, the MPI libraries may be used in conjunction with any compiler. The defaults are selected via the modules in following way - -| Module | MPI | Compiler suite | -| ------------ | ---------------- | ------------------------------------------------------------------------------ | -| PrgEnv-gnu | bullxmpi-1.2.4.1 | bullx GNU 4.4.6 | -| PrgEnv-intel | Intel MPI 4.1.1 | Intel 13.1.1 | -| bullxmpi | bullxmpi-1.2.4.1 | none, select via module | -| impi | Intel MPI 4.1.1 | none, select via module | -| openmpi | OpenMPI 1.6.5 | GNU compilers 4.8.1, GNU compilers 4.4.6, Intel Compilers | -| openmpi | OpenMPI 1.8.1 | GNU compilers 4.8.1, GNU compilers 4.4.6, GNU compilers 4.9.0, Intel Compilers | -| mvapich2 | MPICH2 1.9 | GNU compilers 4.8.1, GNU compilers 4.4.6, Intel Compilers | - -Examples: - -```console -$ ml OpenMPI **or** ml openmpi **for older versions** -``` - -In this example, we activate the latest openmpi with latest GNU compilers - -To use openmpi with the intel compiler suite, use - -```console -$ ml intel -$ ml openmpi/1.6.5-icc -``` - -In this example, the openmpi 1.6.5 using intel compilers is activated - -## Compiling MPI Programs - -!!! note - After setting up your MPI environment, compile your program using one of the mpi wrappers - -```console -$ mpicc -v -$ mpif77 -v -$ mpif90 -v -``` - -Example program: - -```cpp - // helloworld_mpi.c - #include <stdio.h> - - #include<mpi.h> - - int main(int argc, char **argv) { - - int len; - int rank, size; - char node[MPI_MAX_PROCESSOR_NAME]; - - // Initiate MPI - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD,&rank); - MPI_Comm_size(MPI_COMM_WORLD,&size); - - // Get hostame and print - MPI_Get_processor_name(node,&len); - printf("Hello world! from rank %d of %d on host %sn",rank,size,node); - - // Finalize and exit - MPI_Finalize(); - - return 0; - } -``` - -Compile the above example with - -```console -$ mpicc helloworld_mpi.c -o helloworld_mpi.x -``` - -## Running MPI Programs - -!!! note - The MPI program executable must be compatible with the loaded MPI module. - Always compile and execute using the very same MPI module. - -It is strongly discouraged to mix mpi implementations. Linking an application with one MPI implementation and running mpirun/mpiexec form other implementation may result in unexpected errors. - -The MPI program executable must be available within the same path on all nodes. This is automatically fulfilled on the /home and /scratch file system. You need to preload the executable, if running on the local scratch /lscratch file system. - -### Ways to Run MPI Programs - -Optimal way to run an MPI program depends on its memory requirements, memory access pattern and communication pattern. - -!!! note - Consider these ways to run an MPI program: - - 1. One MPI process per node, 16 threads per process - 2. Two MPI processes per node, 8 threads per process - 3. 16 MPI processes per node, 1 thread per process. - -**One MPI** process per node, using 16 threads, is most useful for memory demanding applications, that make good use of processor cache memory and are not memory bound. This is also a preferred way for communication intensive applications as one process per node enjoys full bandwidth access to the network interface. - -**Two MPI** processes per node, using 8 threads each, bound to processor socket is most useful for memory bandwidth bound applications such as BLAS1 or FFT, with scalable memory demand. However, note that the two processes will share access to the network interface. The 8 threads and socket binding should ensure maximum memory access bandwidth and minimize communication, migration and NUMA effect overheads. - -!!! note - Important! Bind every OpenMP thread to a core! - -In the previous two cases with one or two MPI processes per node, the operating system might still migrate OpenMP threads between cores. You want to avoid this by setting the KMP_AFFINITY or GOMP_CPU_AFFINITY environment variables. - -**16 MPI** processes per node, using 1 thread each bound to processor core is most suitable for highly scalable applications with low communication demand. - -### Running OpenMPI - -The **bullxmpi-1.2.4.1** and [**OpenMPI 1.6.5**](http://www.open-mpi.org/) are both based on OpenMPI. Read more on [how to run OpenMPI](Running_OpenMPI/) based MPI. - -### Running MPICH2 - -The **Intel MPI** and **mpich2 1.9** are MPICH2 based implementations. Read more on [how to run MPICH2](running-mpich2/) based MPI. - -The Intel MPI may run on the Intel Xeon Phi accelerators as well. Read more on [how to run Intel MPI on accelerators](../intel-xeon-phi/). diff --git a/docs.it4i/anselm/software/numerical-languages/matlab.md b/docs.it4i/anselm/software/numerical-languages/matlab.md deleted file mode 100644 index ac1b0cc5e6b5728f0079b57b771ec17a219f4d8d..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/numerical-languages/matlab.md +++ /dev/null @@ -1,280 +0,0 @@ -# Matlab - -## Introduction - -Matlab is available in versions R2015a and R2015b. There are always two variants of the release: - -* Non commercial or so called EDU variant, which can be used for common research and educational purposes. -* Commercial or so called COM variant, which can used also for commercial activities. The licenses for commercial variant are much more expensive, so usually the commercial variant has only subset of features compared to the EDU available. - -To load the latest version of Matlab load the module - -```console -$ ml MATLAB -``` - -By default the EDU variant is marked as default. If you need other version or variant, load the particular version. To obtain the list of available versions use - -```console -$ ml av MATLAB -``` - -If you need to use the Matlab GUI to prepare your Matlab programs, you can use Matlab directly on the login nodes. But for all computations use Matlab on the compute nodes via PBS Pro scheduler. - -If you require the Matlab GUI, please follow the general information about [running graphical applications](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). - -Matlab GUI is quite slow using the X forwarding built in the PBS (qsub -X), so using X11 display redirection either via SSH or directly by xauth (please see the "GUI Applications on Compute Nodes over VNC" part [here](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-system/)) is recommended. - -To run Matlab with GUI, use - -```console -$ matlab -``` - -To run Matlab in text mode, without the Matlab Desktop GUI environment, use - -```console -$ matlab -nodesktop -nosplash -``` - -plots, images, etc... will be still available. - -## Running Parallel Matlab Using Distributed Computing Toolbox / Engine - -!!! note - Distributed toolbox is available only for the EDU variant - -The MPIEXEC mode available in previous versions is no longer available in MATLAB 2015. Also, the programming interface has changed. Refer to [Release Notes](http://www.mathworks.com/help/distcomp/release-notes.html#buanp9e-1). - -Delete previously used file mpiLibConf.m, we have observed crashes when using Intel MPI. - -To use Distributed Computing, you first need to setup a parallel profile. We have provided the profile for you, you can either import it in MATLAB command line: - -```console - >> parallel.importProfile('/apps/all/MATLAB/2015a-EDU/SalomonPBSPro.settings') - - ans = - - SalomonPBSPro -``` - -Or in the GUI, go to tab HOME -> Parallel -> Manage Cluster Profiles..., click Import and navigate to: - -/apps/all/MATLAB/2015a-EDU/SalomonPBSPro.settings - -With the new mode, MATLAB itself launches the workers via PBS, so you can either use interactive mode or a batch mode on one node, but the actual parallel processing will be done in a separate job started by MATLAB itself. Alternatively, you can use "local" mode to run parallel code on just a single node. - -!!! note - The profile is confusingly named Salomon, but you can use it also on Anselm. - -### Parallel Matlab Interactive Session - -Following example shows how to start interactive session with support for Matlab GUI. For more information about GUI based applications on Anselm see [this page](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-system/). - -```console -$ xhost + -$ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A NONE-0-0 -q qexp -l select=1 -l walltime=00:30:00 -l feature__matlab__MATLAB=1 -``` - -This qsub command example shows how to run Matlab on a single node. - -The second part of the command shows how to request all necessary licenses. In this case 1 Matlab-EDU license and 48 Distributed Computing Engines licenses. - -Once the access to compute nodes is granted by PBS, user can load following modules and start Matlab: - -```console -r1i0n17$ ml MATLAB/2015b-EDU -r1i0n17$ matlab & -``` - -### Parallel Matlab Batch Job in Local Mode - -To run matlab in batch mode, write an matlab script, then write a bash jobscript and execute via the qsub command. By default, matlab will execute one matlab worker instance per allocated core. - -```bash - #!/bin/bash - #PBS -A PROJECT ID - #PBS -q qprod - #PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1 - - # change to shared scratch directory - SCR=/scratch/work/user/$USER/$PBS_JOBID - mkdir -p $SCR ; cd $SCR || exit - - # copy input file to scratch - cp $PBS_O_WORKDIR/matlabcode.m . - - # load modules - module load MATLAB/2015a-EDU - - # execute the calculation - matlab -nodisplay -r matlabcode > output.out - - # copy output file to home - cp output.out $PBS_O_WORKDIR/. -``` - -This script may be submitted directly to the PBS workload manager via the qsub command. The inputs and matlab script are in matlabcode.m file, outputs in output.out file. Note the missing .m extension in the matlab -r matlabcodefile call, **the .m must not be included**. Note that the **shared /scratch must be used**. Further, it is **important to include quit** statement at the end of the matlabcode.m script. - -Submit the jobscript using qsub - -```console -$ qsub ./jobscript -``` - -### Parallel Matlab Local Mode Program Example - -The last part of the configuration is done directly in the user Matlab script before Distributed Computing Toolbox is started. - -```console - cluster = parcluster('local') -``` - -This script creates scheduler object "cluster" of type "local" that starts workers locally. - -!!! note - Every Matlab script that needs to initialize/use matlabpool has to contain these three lines prior to calling parpool(sched, ...) function. - -The last step is to start matlabpool with "cluster" object and correct number of workers. We have 24 cores per node, so we start 24 workers. - -```console - parpool(cluster,16); - - - ... parallel code ... - - - parpool close -``` - -The complete example showing how to use Distributed Computing Toolbox in local mode is shown here. - -```console - cluster = parcluster('local'); - cluster - - parpool(cluster,24); - - n=2000; - - W = rand(n,n); - W = distributed(W); - x = (1:n)'; - x = distributed(x); - spmd - [~, name] = system('hostname') - - T = W*x; % Calculation performed on labs, in parallel. - % T and W are both codistributed arrays here. - end - T; - whos % T and W are both distributed arrays here. - - parpool close - quit -``` - -You can copy and paste the example in a .m file and execute. Note that the parpool size should correspond to **total number of cores** available on allocated nodes. - -### Parallel Matlab Batch Job Using PBS Mode (Workers Spawned in a Separate Job) - -This mode uses PBS scheduler to launch the parallel pool. It uses the SalomonPBSPro profile that needs to be imported to Cluster Manager, as mentioned before. This methodod uses MATLAB's PBS Scheduler interface - it spawns the workers in a separate job submitted by MATLAB using qsub. - -This is an example of m-script using PBS mode: - -```console - cluster = parcluster('SalomonPBSPro'); - set(cluster, 'SubmitArguments', '-A OPEN-0-0'); - set(cluster, 'ResourceTemplate', '-q qprod -l select=10:ncpus=16'); - set(cluster, 'NumWorkers', 160); - - pool = parpool(cluster, 160); - - n=2000; - - W = rand(n,n); - W = distributed(W); - x = (1:n)'; - x = distributed(x); - spmd - [~, name] = system('hostname') - - T = W*x; % Calculation performed on labs, in parallel. - % T and W are both codistributed arrays here. - end - whos % T and W are both distributed arrays here. - - % shut down parallel pool - delete(pool) -``` - -Note that we first construct a cluster object using the imported profile, then set some important options, namely: SubmitArguments, where you need to specify accounting id, and ResourceTemplate, where you need to specify number of nodes to run the job. - -You can start this script using batch mode the same way as in Local mode example. - -### Parallel Matlab Batch With Direct Launch (Workers Spawned Within the Existing Job) - -This method is a "hack" invented by us to emulate the mpiexec functionality found in previous MATLAB versions. We leverage the MATLAB Generic Scheduler interface, but instead of submitting the workers to PBS, we launch the workers directly within the running job, thus we avoid the issues with master script and workers running in separate jobs (issues with license not available, waiting for the worker's job to spawn etc.) - -!!! warning - This method is experimental. - -For this method, you need to use SalomonDirect profile, import it using [the same way as SalomonPBSPro](matlab/#running-parallel-matlab-using-distributed-computing-toolbox---engine) - -This is an example of m-script using direct mode: - -```console - parallel.importProfile('/apps/all/MATLAB/2015a-EDU/SalomonDirect.settings') - cluster = parcluster('SalomonDirect'); - set(cluster, 'NumWorkers', 48); - - pool = parpool(cluster, 48); - - n=2000; - - W = rand(n,n); - W = distributed(W); - x = (1:n)'; - x = distributed(x); - spmd - [~, name] = system('hostname') - - T = W*x; % Calculation performed on labs, in parallel. - % T and W are both codistributed arrays here. - end - whos % T and W are both distributed arrays here. - - % shut down parallel pool - delete(pool) -``` - -### Non-Interactive Session and Licenses - -If you want to run batch jobs with Matlab, be sure to request appropriate license features with the PBS Pro scheduler, at least the `-l _feature_matlab_MATLAB=1` for EDU variant of Matlab. More information about how to check the license features states and how to request them with PBS Pro, please [look here](../isv_licenses/). - -In case of non-interactive session please read the [following information](../isv_licenses/) on how to modify the qsub command to test for available licenses prior getting the resource allocation. - -### Matlab Distributed Computing Engines Start Up Time - -Starting Matlab workers is an expensive process that requires certain amount of time. For your information please see the following table: - -| compute nodes | number of workers | start-up time[s] | -| ------------- | ----------------- | ---------------- | -| 16 | 384 | 831 | -| 8 | 192 | 807 | -| 4 | 96 | 483 | -| 2 | 48 | 16 | - -## MATLAB on UV2000 - -UV2000 machine available in queue "qfat" can be used for MATLAB computations. This is a SMP NUMA machine with large amount of RAM, which can be beneficial for certain types of MATLAB jobs. CPU cores are allocated in chunks of 8 for this machine. - -You can use MATLAB on UV2000 in two parallel modes: - -### Threaded Mode - -Since this is a SMP machine, you can completely avoid using Parallel Toolbox and use only MATLAB's threading. MATLAB will automatically detect the number of cores you have allocated and will set maxNumCompThreads accordingly and certain operations, such as fft, , eig, svd, etc. will be automatically run in threads. The advantage of this mode is that you don't need to modify your existing sequential codes. - -### Local Cluster Mode - -You can also use Parallel Toolbox on UV2000. Use l[ocal cluster mode](matlab/#parallel-matlab-batch-job-in-local-mode), "SalomonPBSPro" profile will not work. diff --git a/docs.it4i/anselm/software/numerical-languages/r.md b/docs.it4i/anselm/software/numerical-languages/r.md deleted file mode 100644 index 8916ccb7cc21a1e9bf7de6bda24d1a38bdf82263..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/numerical-languages/r.md +++ /dev/null @@ -1,399 +0,0 @@ -# R - -## Introduction - -The R is a language and environment for statistical computing and graphics. R provides a wide variety of statistical (linear and nonlinear modelling, classical statistical tests, time-series analysis, classification, clustering, ...) and graphical techniques, and is highly extensible. - -One of R's strengths is the ease with which well-designed publication-quality plots can be produced, including mathematical symbols and formulae where needed. Great care has been taken over the defaults for the minor design choices in graphics, but the user retains full control. - -Another convenience is the ease with which the C code or third party libraries may be integrated within R. - -Extensive support for parallel computing is available within R. - -Read more on <http://www.r-project.org/>, <http://cran.r-project.org/doc/manuals/r-release/R-lang.html> - -## Modules - -The R version 3.0.1 is available on Anselm, along with GUI interface Rstudio - -| Application | Version | module | -| ----------- | ------------ | ------- | -| **R** | R 3.0.1 | R | -| **Rstudio** | Rstudio 0.97 | Rstudio | - -```console -$ ml R -``` - -## Execution - -The R on Anselm is linked to highly optimized MKL mathematical library. This provides threaded parallelization to many R kernels, notably the linear algebra subroutines. The R runs these heavy calculation kernels without any penalty. By default, the R would parallelize to 16 threads. You may control the threads by setting the OMP_NUM_THREADS environment variable. - -### Interactive Execution - -To run R interactively, using Rstudio GUI, log in with ssh -X parameter for X11 forwarding. Run rstudio: - -```console -$ ml Rstudio -$ rstudio -``` - -### Batch Execution - -To run R in batch mode, write an R script, then write a bash jobscript and execute via the qsub command. By default, R will use 16 threads when running MKL kernels. - -Example jobscript: - -```bash - #!/bin/bash - - # change to local scratch directory - cd /lscratch/$PBS_JOBID || exit - - # copy input file to scratch - cp $PBS_O_WORKDIR/rscript.R . - - # load R module - module load R - - # execute the calculation - R CMD BATCH rscript.R routput.out - - # copy output file to home - cp routput.out $PBS_O_WORKDIR/. - - #exit - exit -``` - -This script may be submitted directly to the PBS workload manager via the qsub command. The inputs are in rscript.R file, outputs in routput.out file. See the single node jobscript example in the [Job execution section](../../job-submission-and-execution/). - -## Parallel R - -Parallel execution of R may be achieved in many ways. One approach is the implied parallelization due to linked libraries or specially enabled functions, as [described above](r/#interactive-execution). In the following sections, we focus on explicit parallelization, where parallel constructs are directly stated within the R script. - -## Package Parallel - -The package parallel provides support for parallel computation, including by forking (taken from package multicore), by sockets (taken from package snow) and random-number generation. - -The package is activated this way: - -```console -$ R - > library(parallel) -``` - -More information and examples may be obtained directly by reading the documentation available in R - -```console - > ?parallel - > library(help = "parallel") - > vignette("parallel") -``` - -Download the package [parallell](package-parallel-vignette.pdf) vignette. - -The forking is the most simple to use. Forking family of functions provide parallelized, drop in replacement for the serial apply() family of functions. - -!!! note - Forking via package parallel provides functionality similar to OpenMP construct - - omp parallel for - - Only cores of single node can be utilized this way! - -Forking example: - -```r - library(parallel) - - #integrand function - f <- function(i,h) { - x <- h*(i-0.5) - return (4/(1 + x*x)) - } - - #initialize - size <- detectCores() - - while (TRUE) - { - #read number of intervals - cat("Enter the number of intervals: (0 quits) ") - fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) - - if(n<=0) break - - #run the calculation - n <- max(n,size) - h <- 1.0/n - - i <- seq(1,n); - pi3 <- h*sum(simplify2array(mclapply(i,f,h,mc.cores=size))); - - #print results - cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) - } -``` - -The above example is the classic parallel example for calculating the number Ď€. Note the **detectCores()** and **mclapply()** functions. Execute the example as: - -```console -$ R --slave --no-save --no-restore -f pi3p.R -``` - -Every evaluation of the integrad function runs in parallel on different process. - -## Package Rmpi - -!!! note - package Rmpi provides an interface (wrapper) to MPI APIs. - -It also provides interactive R slave environment. On Anselm, Rmpi provides interface to the [OpenMPI](../mpi-1/Running_OpenMPI/). - -Read more on Rmpi at <http://cran.r-project.org/web/packages/Rmpi/>, reference manual is available at <http://cran.r-project.org/web/packages/Rmpi/Rmpi.pdf> - -When using package Rmpi, both openmpi and R modules must be loaded - -```console -$ ml OpenMPI -$ ml R -``` - -Rmpi may be used in three basic ways. The static approach is identical to executing any other MPI programm. In addition, there is Rslaves dynamic MPI approach and the mpi.apply approach. In the following section, we will use the number Ď€ integration example, to illustrate all these concepts. - -### Static Rmpi - -Static Rmpi programs are executed via mpiexec, as any other MPI programs. Number of processes is static - given at the launch time. - -Static Rmpi example: - -```r - library(Rmpi) - - #integrand function - f <- function(i,h) { - x <- h*(i-0.5) - return (4/(1 + x*x)) - } - - #initialize - invisible(mpi.comm.dup(0,1)) - rank <- mpi.comm.rank() - size <- mpi.comm.size() - n<-0 - - while (TRUE) - { - #read number of intervals - if (rank==0) { - cat("Enter the number of intervals: (0 quits) ") - fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) - } - - #broadcat the intervals - n <- mpi.bcast(as.integer(n),type=1) - - if(n<=0) break - - #run the calculation - n <- max(n,size) - h <- 1.0/n - - i <- seq(rank+1,n,size); - mypi <- h*sum(sapply(i,f,h)); - - pi3 <- mpi.reduce(mypi) - - #print results - if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) - } - - mpi.quit() -``` - -The above is the static MPI example for calculating the number Ď€. Note the **library(Rmpi)** and **mpi.comm.dup()** function calls. - -Execute the example as: - -```console -$ mpiexec R --slave --no-save --no-restore -f pi3.R -``` - -### Dynamic Rmpi - -Dynamic Rmpi programs are executed by calling the R directly. openmpi module must be still loaded. The R slave processes will be spawned by a function call within the Rmpi program. - -Dynamic Rmpi example: - -```r - #integrand function - f <- function(i,h) { - x <- h*(i-0.5) - return (4/(1 + x*x)) - } - - #the worker function - workerpi <- function() - { - #initialize - rank <- mpi.comm.rank() - size <- mpi.comm.size() - n<-0 - - while (TRUE) - { - #read number of intervals - if (rank==0) { - cat("Enter the number of intervals: (0 quits) ") - fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) - } - - #broadcat the intervals - n <- mpi.bcast(as.integer(n),type=1) - - if(n<=0) break - - #run the calculation - n <- max(n,size) - h <- 1.0/n - - i <- seq(rank+1,n,size); - mypi <- h*sum(sapply(i,f,h)); - - pi3 <- mpi.reduce(mypi) - - #print results - if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) - } - } - - #main - library(Rmpi) - - cat("Enter the number of slaves: ") - fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) - - mpi.spawn.Rslaves(nslaves=ns) - mpi.bcast.Robj2slave(f) - mpi.bcast.Robj2slave(workerpi) - - mpi.bcast.cmd(workerpi()) - workerpi() - - mpi.quit() -``` - -The above example is the dynamic MPI example for calculating the number Ď€. Both master and slave processes carry out the calculation. Note the mpi.spawn.Rslaves(), mpi.bcast.Robj2slave()** and the mpi.bcast.cmd()** function calls. - -Execute the example as: - -```console -$ R --slave --no-save --no-restore -f pi3Rslaves.R -``` - -### mpi.apply Rmpi - -mpi.apply is a specific way of executing Dynamic Rmpi programs. - -!!! note - mpi.apply() family of functions provide MPI parallelized, drop in replacement for the serial apply() family of functions. - -Execution is identical to other dynamic Rmpi programs. - -mpi.apply Rmpi example: - -```r - #integrand function - f <- function(i,h) { - x <- h*(i-0.5) - return (4/(1 + x*x)) - } - - #the worker function - workerpi <- function(rank,size,n) - { - #run the calculation - n <- max(n,size) - h <- 1.0/n - - i <- seq(rank,n,size); - mypi <- h*sum(sapply(i,f,h)); - - return(mypi) - } - - #main - library(Rmpi) - - cat("Enter the number of slaves: ") - fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) - - mpi.spawn.Rslaves(nslaves=ns) - mpi.bcast.Robj2slave(f) - mpi.bcast.Robj2slave(workerpi) - - while (TRUE) - { - #read number of intervals - cat("Enter the number of intervals: (0 quits) ") - fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) - if(n<=0) break - - #run workerpi - i=seq(1,2*ns) - pi3=sum(mpi.parSapply(i,workerpi,2*ns,n)) - - #print results - cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) - } - - mpi.quit() -``` - -The above is the mpi.apply MPI example for calculating the number Ď€. Only the slave processes carry out the calculation. Note the **mpi.parSapply()**, function call. The package parallel [example](r/#package-parallel)[above](r/#package-parallel) may be trivially adapted (for much better performance) to this structure using the mclapply() in place of mpi.parSapply(). - -Execute the example as: - -```console -$ R --slave --no-save --no-restore -f pi3parSapply.R -``` - -## Combining Parallel and Rmpi - -Currently, the two packages can not be combined for hybrid calculations. - -## Parallel Execution - -The R parallel jobs are executed via the PBS queue system exactly as any other parallel jobs. User must create an appropriate jobscript and submit via the **qsub** - -Example jobscript for [static Rmpi](r/#static-rmpi) parallel R execution, running 1 process per core: - -```bash - #!/bin/bash - #PBS -q qprod - #PBS -N Rjob - #PBS -l select=100:ncpus=16:mpiprocs=16:ompthreads=1 - - # change to scratch directory - SCRDIR=/scratch/$USER/myjob - cd $SCRDIR || exit - - # copy input file to scratch - cp $PBS_O_WORKDIR/rscript.R . - - # load R and openmpi module - module load R - module load openmpi - - # execute the calculation - mpiexec -bycore -bind-to-core R --slave --no-save --no-restore -f rscript.R - - # copy output file to home - cp routput.out $PBS_O_WORKDIR/. - - #exit - exit -``` - -For more information about jobscript and MPI execution refer to the [Job submission](../../job-submission-and-execution/) and general [MPI](../mpi/mpi/) sections. diff --git a/docs.it4i/anselm/software/nvidia-cuda.md b/docs.it4i/anselm/software/nvidia-cuda.md index 6b06d9384302e0e023f807dcb2eb983a11b3b73a..91251e132e59d3c86d1b60169f7da82cfae2fcee 100644 --- a/docs.it4i/anselm/software/nvidia-cuda.md +++ b/docs.it4i/anselm/software/nvidia-cuda.md @@ -91,92 +91,92 @@ Expected output of the deviceQuery example executed on a node with Tesla K20m is In this section we provide a basic CUDA based vector addition code example. You can directly copy and paste the code to test it. -```console +```cpp $ vim test.cu - #define N (2048*2048) - #define THREADS_PER_BLOCK 512 - - #include <stdio.h> - #include <stdlib.h> - - // GPU kernel function to add two vectors - __global__ void add_gpu( int *a, int *b, int *c, int n){ - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index < n) - c[index] = a[index] + b[index]; +#define N (2048*2048) +#define THREADS_PER_BLOCK 512 + +#include <stdio.h> +#include <stdlib.h> + +// GPU kernel function to add two vectors +__global__ void add_gpu( int *a, int *b, int *c, int n){ + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) + c[index] = a[index] + b[index]; +} + +// CPU function to add two vectors +void add_cpu (int *a, int *b, int *c, int n) { + for (int i=0; i < n; i++) + c[i] = a[i] + b[i]; +} + +// CPU function to generate a vector of random integers +void random_ints (int *a, int n) { + for (int i = 0; i < n; i++) + a[i] = rand() % 10000; // random number between 0 and 9999 +} + +// CPU function to compare two vectors +int compare_ints( int *a, int *b, int n ){ + int pass = 0; + for (int i = 0; i < N; i++){ + if (a[i] != b[i]) { + printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); + pass = 1; } + } + if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); + return pass; +} - // CPU function to add two vectors - void add_cpu (int *a, int *b, int *c, int n) { - for (int i=0; i < n; i++) - c[i] = a[i] + b[i]; - } +int main( void ) { - // CPU function to generate a vector of random integers - void random_ints (int *a, int n) { - for (int i = 0; i < n; i++) - a[i] = rand() % 10000; // random number between 0 and 9999 - } + int *a, *b, *c; // host copies of a, b, c + int *dev_a, *dev_b, *dev_c; // device copies of a, b, c + int size = N * sizeof( int ); // we need space for N integers - // CPU function to compare two vectors - int compare_ints( int *a, int *b, int n ){ - int pass = 0; - for (int i = 0; i < N; i++){ - if (a[i] != b[i]) { - printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); - pass = 1; - } - } - if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); - return pass; - } - - int main( void ) { - - int *a, *b, *c; // host copies of a, b, c - int *dev_a, *dev_b, *dev_c; // device copies of a, b, c - int size = N * sizeof( int ); // we need space for N integers + // Allocate GPU/device copies of dev_a, dev_b, dev_c + cudaMalloc( (void**)&dev_a, size ); + cudaMalloc( (void**)&dev_b, size ); + cudaMalloc( (void**)&dev_c, size ); - // Allocate GPU/device copies of dev_a, dev_b, dev_c - cudaMalloc( (void**)&dev_a, size ); - cudaMalloc( (void**)&dev_b, size ); - cudaMalloc( (void**)&dev_c, size ); + // Allocate CPU/host copies of a, b, c + a = (int*)malloc( size ); + b = (int*)malloc( size ); + c = (int*)malloc( size ); - // Allocate CPU/host copies of a, b, c - a = (int*)malloc( size ); - b = (int*)malloc( size ); - c = (int*)malloc( size ); + // Fill input vectors with random integer numbers + random_ints( a, N ); + random_ints( b, N ); - // Fill input vectors with random integer numbers - random_ints( a, N ); - random_ints( b, N ); + // copy inputs to device + cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice ); + cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice ); - // copy inputs to device - cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice ); - cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice ); + // launch add_gpu() kernel with blocks and threads + add_gpu<<< N/THREADS_PER_BLOCK, THREADS_PER_BLOCK >>( dev_a, dev_b, dev_c, N ); - // launch add_gpu() kernel with blocks and threads - add_gpu<<< N/THREADS_PER_BLOCK, THREADS_PER_BLOCK >>( dev_a, dev_b, dev_c, N ); + // copy device result back to host copy of c + cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost ); - // copy device result back to host copy of c - cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost ); + //Check the results with CPU implementation + int *c_h; c_h = (int*)malloc( size ); + add_cpu (a, b, c_h, N); + compare_ints(c, c_h, N); - //Check the results with CPU implementation - int *c_h; c_h = (int*)malloc( size ); - add_cpu (a, b, c_h, N); - compare_ints(c, c_h, N); + // Clean CPU memory allocations + free( a ); free( b ); free( c ); free (c_h); - // Clean CPU memory allocations - free( a ); free( b ); free( c ); free (c_h); + // Clean GPU memory allocations + cudaFree( dev_a ); + cudaFree( dev_b ); + cudaFree( dev_c ); - // Clean GPU memory allocations - cudaFree( dev_a ); - cudaFree( dev_b ); - cudaFree( dev_c ); - - return 0; - } + return 0; +} ``` This code can be compiled using following command @@ -204,81 +204,81 @@ The NVIDIA CUDA Basic Linear Algebra Subroutines (cuBLAS) library is a GPU-accel SAXPY function multiplies the vector x by the scalar alpha and adds it to the vector y overwriting the latest vector with the result. The description of the cuBLAS function can be found in [NVIDIA CUDA documentation](http://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-axpy "Nvidia CUDA documentation "). Code can be pasted in the file and compiled without any modification. ```cpp - /* Includes, system */ - #include <stdio.h> - #include <stdlib.h> - - /* Includes, cuda */ - #include <cuda_runtime.h> - #include <cublas_v2.h> - - /* Vector size */ - #define N (32) - - /* Host implementation of a simple version of saxpi */ - void saxpy(int n, float alpha, const float *x, float *y) +/* Includes, system */ +#include <stdio.h> +#include <stdlib.h> + +/* Includes, cuda */ +#include <cuda_runtime.h> +#include <cublas_v2.h> + +/* Vector size */ +#define N (32) + +/* Host implementation of a simple version of saxpi */ +void saxpy(int n, float alpha, const float *x, float *y) +{ + for (int i = 0; i < n; ++i) + y[i] = alpha*x[i] + y[i]; +} + +/* Main */ +int main(int argc, char **argv) +{ + float *h_X, *h_Y, *h_Y_ref; + float *d_X = 0; + float *d_Y = 0; + + const float alpha = 1.0f; + int i; + + cublasHandle_t handle; + + /* Initialize CUBLAS */ + printf("simpleCUBLAS test running..n"); + cublasCreate(&handle); + + /* Allocate host memory for the matrices */ + h_X = (float *)malloc(N * sizeof(h_X[0])); + h_Y = (float *)malloc(N * sizeof(h_Y[0])); + h_Y_ref = (float *)malloc(N * sizeof(h_Y_ref[0])); + + /* Fill the matrices with test data */ + for (i = 0; i < N; i++) { - for (int i = 0; i < n; ++i) - y[i] = alpha*x[i] + y[i]; + h_X[i] = rand() / (float)RAND_MAX; + h_Y[i] = rand() / (float)RAND_MAX; + h_Y_ref[i] = h_Y[i]; } - /* Main */ - int main(int argc, char **argv) - { - float *h_X, *h_Y, *h_Y_ref; - float *d_X = 0; - float *d_Y = 0; - - const float alpha = 1.0f; - int i; - - cublasHandle_t handle; + /* Allocate device memory for the matrices */ + cudaMalloc((void **)&d_X, N * sizeof(d_X[0])); + cudaMalloc((void **)&d_Y, N * sizeof(d_Y[0])); - /* Initialize CUBLAS */ - printf("simpleCUBLAS test running..n"); - cublasCreate(&handle); + /* Initialize the device matrices with the host matrices */ + cublasSetVector(N, sizeof(h_X[0]), h_X, 1, d_X, 1); + cublasSetVector(N, sizeof(h_Y[0]), h_Y, 1, d_Y, 1); - /* Allocate host memory for the matrices */ - h_X = (float *)malloc(N * sizeof(h_X[0])); - h_Y = (float *)malloc(N * sizeof(h_Y[0])); - h_Y_ref = (float *)malloc(N * sizeof(h_Y_ref[0])); + /* Performs operation using plain C code */ + saxpy(N, alpha, h_X, h_Y_ref); - /* Fill the matrices with test data */ - for (i = 0; i < N; i++) - { - h_X[i] = rand() / (float)RAND_MAX; - h_Y[i] = rand() / (float)RAND_MAX; - h_Y_ref[i] = h_Y[i]; - } + /* Performs operation using cublas */ + cublasSaxpy(handle, N, &alpha, d_X, 1, d_Y, 1); - /* Allocate device memory for the matrices */ - cudaMalloc((void **)&d_X, N * sizeof(d_X[0])); - cudaMalloc((void **)&d_Y, N * sizeof(d_Y[0])); + /* Read the result back */ + cublasGetVector(N, sizeof(h_Y[0]), d_Y, 1, h_Y, 1); - /* Initialize the device matrices with the host matrices */ - cublasSetVector(N, sizeof(h_X[0]), h_X, 1, d_X, 1); - cublasSetVector(N, sizeof(h_Y[0]), h_Y, 1, d_Y, 1); + /* Check result against reference */ + for (i = 0; i < N; ++i) + printf("CPU res = %f t GPU res = %f t diff = %f n", h_Y_ref[i], h_Y[i], h_Y_ref[i] - h_Y[i]); - /* Performs operation using plain C code */ - saxpy(N, alpha, h_X, h_Y_ref); + /* Memory clean up */ + free(h_X); free(h_Y); free(h_Y_ref); + cudaFree(d_X); cudaFree(d_Y); - /* Performs operation using cublas */ - cublasSaxpy(handle, N, &alpha, d_X, 1, d_Y, 1); - - /* Read the result back */ - cublasGetVector(N, sizeof(h_Y[0]), d_Y, 1, h_Y, 1); - - /* Check result against reference */ - for (i = 0; i < N; ++i) - printf("CPU res = %f t GPU res = %f t diff = %f n", h_Y_ref[i], h_Y[i], h_Y_ref[i] - h_Y[i]); - - /* Memory clean up */ - free(h_X); free(h_Y); free(h_Y_ref); - cudaFree(d_X); cudaFree(d_Y); - - /* Shutdown */ - cublasDestroy(handle); - } + /* Shutdown */ + cublasDestroy(handle); +} ``` !!! note diff --git a/docs.it4i/anselm/software/operating-system.md b/docs.it4i/anselm/software/operating-system.md deleted file mode 100644 index e43800e0d038882270620ccb8e95d50df94a5b71..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/operating-system.md +++ /dev/null @@ -1,3 +0,0 @@ -# Operating System - -The operating system on Anselm is Linux - [**Red Hat Enterprise Linux release 6.x**](https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux). diff --git a/docs.it4i/anselm/software/paraview.md b/docs.it4i/anselm/software/paraview.md deleted file mode 100644 index 830ce72a26c0e6a22683534e12444392040c1a58..0000000000000000000000000000000000000000 --- a/docs.it4i/anselm/software/paraview.md +++ /dev/null @@ -1,85 +0,0 @@ -# ParaView - -Open-Source, Multi-Platform Data Analysis and Visualization Application - -## Introduction - -**ParaView** is an open-source, multi-platform data analysis and visualization application. ParaView users can quickly build visualizations to analyze their data using qualitative and quantitative techniques. The data exploration can be done interactively in 3D or programmatically using ParaView's batch processing capabilities. - -ParaView was developed to analyze extremely large datasets using distributed memory computing resources. It can be run on supercomputers to analyze datasets of exascale size as well as on laptops for smaller data. - -Homepage : <http://www.paraview.org/> - -## Installed Version - -Currently, version 5.1.2 compiled with intel/2017a against intel MPI library and OSMesa 12.0.2 is installed on Anselm. - -## Usage - -On Anselm, ParaView is to be used in client-server mode. A parallel ParaView server is launched on compute nodes by the user, and client is launched on your desktop PC to control and view the visualization. Download ParaView client application for your OS here: <http://paraview.org/paraview/resources/software.php>. - -!!!Warning - Your version must match the version number installed on Anselm. - -### Launching Server - -To launch the server, you must first allocate compute nodes, for example - -```console -$ qsub -I -q qprod -A OPEN-0-0 -l select=2 -``` - -to launch an interactive session on 2 nodes. Refer to [Resource Allocation and Job Execution](../job-submission-and-execution/) for details. - -After the interactive session is opened, load the ParaView module : - -```console -$ ml ParaView/5.1.2-intel-2017a-mpi -``` - -Now launch the parallel server, with number of nodes times 16 processes: - -```console -$ mpirun -np 32 pvserver --use-offscreen-rendering - Waiting for client... - Connection URL: cs://cn77:11111 - Accepting connection(s): cn77:11111 -``` - -Note the that the server is listening on compute node cn77 in this case, we shall use this information later. - -### Client Connection - -Because a direct connection is not allowed to compute nodes on Anselm, you must establish a SSH tunnel to connect to the server. Choose a port number on your PC to be forwarded to ParaView server, for example 12345. If your PC is running Linux, use this command to establish a SSH tunnel: - -```console -$ ssh -TN -L 12345:cn77:11111 username@anselm.it4i.cz -``` - -replace username with your login and cn77 with the name of compute node your ParaView server is running on (see previous step). - -If you use PuTTY on Windows, load Anselm connection configuration, then go to *Connection* -> *SSH* -> *Tunnels* to set up the port forwarding. - -Fill the Source port and Destination fields. **Do not forget to click the Add button.** - - - -Now launch ParaView client installed on your desktop PC. Select *File* -> *Connect*... and fill in the following : - - - -The configuration is now saved for later use. Now click Connect to connect to the ParaView server. In your terminal where you have interactive session with ParaView server launched, you should see: - -```console -Client connected. -``` - -You can now use Parallel ParaView. - -### Close Server - -Remember to close the interactive session after you finish working with ParaView server, as it will remain launched even after your client is disconnected and will continue to consume resources. - -## GPU Support - -Currently, GPU acceleration is not supported in the server and ParaView will not take advantage of accelerated nodes on Anselm. Support for GPU acceleration might be added in the future. diff --git a/docs.it4i/anselm/storage.md b/docs.it4i/anselm/storage.md index 17f0d680da0e5145089ee6748c18530601300f4b..3a3d85f50b2a4d7dd78e3bc96f7a58b480b53be7 100644 --- a/docs.it4i/anselm/storage.md +++ b/docs.it4i/anselm/storage.md @@ -10,9 +10,7 @@ Please don't use shared filesystems as a backup for large amount of data or long Anselm computer provides two main shared filesystems, the [HOME filesystem](#home) and the [SCRATCH filesystem](#scratch). Both HOME and SCRATCH filesystems are realized as a parallel Lustre filesystem. Both shared file systems are accessible via the Infiniband network. Extended ACLs are provided on both Lustre filesystems for the purpose of sharing data with other users using fine-grained control. -### Understanding the Lustre Filesystems - -(source <http://www.nas.nasa.gov>) +### [Understanding the Lustre Filesystems](http://www.nas.nasa.gov) A user file on the Lustre filesystem can be divided into multiple chunks (stripes) and stored across a subset of the object storage targets (OSTs) (disks). The stripes are distributed among the OSTs in a round-robin fashion to ensure load balancing. @@ -72,7 +70,7 @@ Another good practice is to make the stripe count be an integral factor of the n Large stripe size allows each client to have exclusive access to its own part of a file. However, it can be counterproductive in some cases if it does not match your I/O pattern. The choice of stripe size has no effect on a single-stripe file. -Read more on <http://doc.lustre.org/lustre_manual.xhtml#managingstripingfreespace> +Read more on [here](http://doc.lustre.org/lustre_manual.xhtml#managingstripingfreespace) ### Lustre on Anselm @@ -98,7 +96,7 @@ The architecture of Lustre on Anselm is composed of two metadata servers (MDS) a * 2 groups of 5 disks in RAID5 * 2 hot-spare disks -### HOME +### HOME File System The HOME filesystem is mounted in directory /home. Users home directories /home/username reside on this filesystem. Accessible capacity is 320TB, shared among all users. Individual users are restricted by filesystem usage quotas, set to 250GB per user. If 250GB should prove as insufficient for particular user, please contact [support](https://support.it4i.cz/rt), the quota may be lifted upon request. @@ -127,7 +125,7 @@ Default stripe size is 1MB, stripe count is 1. There are 22 OSTs dedicated for t | Default stripe count | 1 | | Number of OSTs | 22 | -### SCRATCH +### SCRATCH File System The SCRATCH filesystem is mounted in directory /scratch. Users may freely create subdirectories and files on the filesystem. Accessible capacity is 146TB, shared among all users. Individual users are restricted by filesystem usage quotas, set to 100TB per user. The purpose of this quota is to prevent runaway programs from filling the entire filesystem and deny service to other users. If 100TB should prove as insufficient for particular user, please contact [support](https://support.it4i.cz/rt), the quota may be lifted upon request. @@ -183,7 +181,7 @@ Entries: 0 Entries limit: 0 ``` -In this example, we view current size limits and space occupied on the /home and /scratch filesystem, for a particular user executing the command. +In this example, we view current size limits and space occupied on the /home and /scratch filesystem, for a particular user executing the command. Note that limits are imposed also on number of objects (files, directories, links, etc...) that are allowed to create. To have a better understanding of where the space is exactly used, you can use following command to find out. @@ -251,7 +249,7 @@ other::--- Default ACL mechanism can be used to replace setuid/setgid permissions on directories. Setting a default ACL on a directory (-d flag to setfacl) will cause the ACL permissions to be inherited by any newly created file or subdirectory within the directory. Refer to this page for more information on Linux ACL: -[http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html](http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html) +[redhat guide](https://access.redhat.com/documentation/en-US/Red_Hat_Storage/2.0/html/Administration_Guide/ch09s05.html) ## Local Filesystems diff --git a/docs.it4i/apiv1.md b/docs.it4i/apiv1.md new file mode 100644 index 0000000000000000000000000000000000000000..8f3cc66e87063e9bec279ed8a6b1436ee586ce5e --- /dev/null +++ b/docs.it4i/apiv1.md @@ -0,0 +1,3 @@ +# API Placeholder + +This page is beeing create automatically from the API source code. diff --git a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/graphical-user-interface.md b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/graphical-user-interface.md index f1c3573a84bd0e13a403e0b4b0566120585c1d22..b12d1bb970e32369a50bc334b8140ada0c06156a 100644 --- a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/graphical-user-interface.md +++ b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/graphical-user-interface.md @@ -10,4 +10,4 @@ Read more about configuring [**X Window System**](x-window-system/). The **Virtual Network Computing** (**VNC**) is a graphical [desktop sharing](http://en.wikipedia.org/wiki/Desktop_sharing "Desktop sharing") system that uses the [Remote Frame Buffer protocol (RFB)](http://en.wikipedia.org/wiki/RFB_protocol "RFB protocol") to remotely control another [computer](http://en.wikipedia.org/wiki/Computer "Computer"). -Read more about configuring **[VNC](vnc/)**. +Read more about configuring **[VNC](vnc/)**. \ No newline at end of file diff --git a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/vnc.md b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/vnc.md index b2fa2f58dd0a04e2e5ace8e3035dc5b95fc4a1b5..12f0743a3dd9905ce02b581c04647566ce516120 100644 --- a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/vnc.md +++ b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/vnc.md @@ -2,10 +2,12 @@ The **Virtual Network Computing** (**VNC**) is a graphical [desktop sharing](http://en.wikipedia.org/wiki/Desktop_sharing "Desktop sharing") system that uses the [Remote Frame Buffer protocol (RFB)](http://en.wikipedia.org/wiki/RFB_protocol "RFB protocol") to remotely control another [computer](http://en.wikipedia.org/wiki/Computer "Computer"). It transmits the [keyboard](http://en.wikipedia.org/wiki/Computer_keyboard "Computer keyboard") and [mouse](http://en.wikipedia.org/wiki/Computer_mouse") events from one computer to another, relaying the graphical [screen](http://en.wikipedia.org/wiki/Computer_screen "Computer screen") updates back in the other direction, over a [network](http://en.wikipedia.org/wiki/Computer_network "Computer network"). -Vnc-based connections are faster (require less network bandwidth) then [X11](x-window-system) applications forwarded directly through ssh. +Vnc-based connections are usually faster (require less network bandwidth) then [X11](x-window-system) applications forwarded directly through ssh. The recommended clients are [TightVNC](http://www.tightvnc.com) or [TigerVNC](http://sourceforge.net/apps/mediawiki/tigervnc/index.php?title=Main_Page) (free, open source, available for almost any platform). +In this chapter we show how to create an underlying ssh tunnel from your client machine to one of our login nodes. Then, how to start your own vnc server on our login node and finally how to connect to your vnc server via the encrypted ssh tunnel. + ## Create VNC Password !!! note @@ -22,9 +24,10 @@ Verify: !!! note To access VNC a local vncserver must be started first and also a tunnel using SSH port forwarding must be established. -[See below](#linuxmac-os-example-of-creating-a-tunnel) for the details on SSH tunnels. In this example we use display number 61. +[See below](#linuxmac-os-example-of-creating-a-tunnel) for the details on SSH tunnels. -You can find display numbers which are already occupied on the login2. Here you can see that displays " /usr/bin/Xvnc :79" and " /usr/bin/Xvnc :60" are occupied. +You should start by **choosing your display number**. +To choose free one, you should check currently occupied display numbers - list them using command: ```console [username@login2 ~]$ ps aux | grep Xvnc @@ -33,7 +36,15 @@ username 10296 0.0 0.0 131772 21076 pts/29 SN 13:01 0:01 /usr/bin/Xvnc ..... ``` -Choose free port e.g. 61 and start your VNC server: +As you can see above, displays ":79" and ":60" we had occupied already. +Generally, you can choose display number freely *except these occupied numbers*. +Also remember that display number should be less or equal 99. +Based on this **we have choosen display number 61** for us, so this number you can see in examples below. + +!!! note + Your situation may be different so also choose of your number may be different. **Please choose and use your own display number accordingly!** + +Start your VNC server on choosen display port (61): ```console [username@login2 ~]$ vncserver :61 -geometry 1600x900 -depth 16 @@ -44,7 +55,7 @@ Starting applications specified in /home/username/.vnc/xstartup Log file is /home/username/.vnc/login2:1.log ``` -Check if VNC server is started (in this example display number is 61): +Check whether VNC server is running on choosen port (61): ```console [username@login2 .vnc]$ vncserver -list @@ -55,17 +66,18 @@ X DISPLAY # PROCESS ID :61 18437 ``` -Another command: +Another way to check it: ```console [username@login2 .vnc]$ ps aux | grep Xvnc username 10296 0.0 0.0 131772 21076 pts/29 SN 13:01 0:01 /usr/bin/Xvnc :61 -desktop login2:61 (username) -auth /home/vop999/.Xauthority -geometry 1600x900 -depth 16 -rfbwait 30000 -rfbauth /home/username/.vnc/passwd -rfbport 5961 -fp catalogue:/etc/X11/fontpath.d -pn ``` + !!! note - The vncserver runs on port 5900 + display number. You get your port number simply as 5900 + display number (in this example 61), so the result is 5961. + The VNC server runs on port 59xx, where xx is the display number. So, you get your port number simply as 5900 + display number, in our example 5900 + 61 = 5961. **Please, calculate your own port number and use it instead of 5961 from examples below!** -To access the VNC server you have to create a tunnel between the login node using TCP **port 5961** and your machine using a free TCP port (for simplicity the very same, in this case). See examples for [Linux/Mac OS](#linuxmac-os-example-of-creating-a-tunnel) and [Windows](#windows-example-of-creating-a-tunnel). +To access the VNC server you have to create a tunnel between the login node using TCP port 5961 and your machine using a free TCP port (for simplicity the very same) in next step. See examples for [Linux/Mac OS](#linuxmac-os-example-of-creating-a-tunnel) and [Windows](#windows-example-of-creating-a-tunnel). !!! note The tunnel must point to the same login node where you launched the VNC server, eg. login2. If you use just cluster-name.it4i.cz, the tunnel might point to a different node due to DNS round robin. @@ -188,6 +200,9 @@ Or this way: [username@login2 .vnc]$ pkill vnc ``` +!!! note + Do not forget to terminate also SSH tunnel, if it was used. Look on end of [this section](#linuxmac-os-example-of-creating-a-tunnel) for the details. + ## GUI Applications on Compute Nodes Over VNC The very same methods as described above, may be used to run the GUI applications on compute nodes. However, for maximum performance, proceed following these steps: diff --git a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md index 961123f511f779edc6e508aec5e6461f5506f06d..6263d2c68cea9f4551365a96386e3fcdf79a4f9c 100644 --- a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md +++ b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md @@ -15,7 +15,11 @@ In order to display graphical user interface GUI of various software tools, you ## X Display Forwarding on Windows -On Windows use the PuTTY client to enable X11 forwarding. In PuTTY menu, go to Connection-SSH-X11, mark the Enable X11 forwarding checkbox before logging in. Then log in as usual. +On Windows use the PuTTY client to enable X11 forwarding. In PuTTY menu, go to Connection-SSH-X11, mark the Enable X11 forwarding checkbox before logging in. + + + +Then log in as usual. To verify the forwarding, type @@ -130,27 +134,3 @@ In this way, we run remote gnome session on the cluster, displaying it in the lo Use System-Log Out to close the gnome-session -### if No Able to Forward X11 Using PuTTY to CygwinX - -```console -[usename@login1.anselm ~]$ gnome-session & -[1] 23691 -[usename@login1.anselm ~]$ PuTTY X11 proxy: unable to connect to forwarded X server: Network error: Connection refused -PuTTY X11 proxy: unable to connect to forwarded X server: Network error: Connection refused - - (gnome-session:23691): WARNING **: Cannot open display:** -``` - -1. Locate and modify Cygwin shortcut that uses [startxwin](http://x.cygwin.com/docs/man1/startxwin.1.html) - locate - C:cygwin64binXWin.exe - change it - to - C:_cygwin64binXWin.exe -listen tcp_ - - - -1. Check Putty settings: - Enable X11 forwarding - -  diff --git a/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md b/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md index 5a952ea24c738ad59acf7b94bed9fe23602b83e9..6f45c5a38f65df2f190eb5834ae8903164ae7057 100644 --- a/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md +++ b/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md @@ -1,4 +1,4 @@ -# OpenSSH Keys (UN\*X) +# OpenSSH Keys (UNIX) ## Key Management diff --git a/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md b/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md index 01123953847eefae3965c86e4896e6573f5514a5..5b34c34052c4ad6e6847de26d9806aa9c4d9c969 100644 --- a/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md +++ b/docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md @@ -1,4 +1,4 @@ -# VPN - Connection fail in Win 8.1 +# VPN - Connection Fail in Win 8.1 ## Failed to Initialize Connection Subsystem Win 8.1 - 02-10-15 MS Patch diff --git a/docs.it4i/general/accessing-the-clusters/vpn-access.md b/docs.it4i/general/accessing-the-clusters/vpn-access.md index 8f24a21f54aa37624035cf8aa42806af9d09c4a8..dc89ef681dcecbb32b97b610a6cc0211b8851cb6 100644 --- a/docs.it4i/general/accessing-the-clusters/vpn-access.md +++ b/docs.it4i/general/accessing-the-clusters/vpn-access.md @@ -15,11 +15,11 @@ It is impossible to connect to VPN from other operating systems. ## VPN Client Installation -You can install VPN client from web interface after successful login with LDAP credentials on address <https://vpn.it4i.cz/user> +You can install VPN client from web interface after successful login with [IT4I credentials](../obtaining-login-credentials/obtaining-login-credentials/#login-credentials) on address <https://vpn.it4i.cz/user>  -According to the Java settings after login, the client either automatically installs, or downloads installation file for your operating system. It is necessary to allow start of installation tool for automatic installation. +According to the Java settings after login, the client either automatically installs, or downloads installation file for your operating system. It is necessary to allow start of installation tool for automatic installation. If auto install does not start, then proceed with manual installation described in next steps.   @@ -37,7 +37,7 @@ After you click on the link, download of installation file will start.  -After successful download of installation file, you have to execute this tool with administrator's rights and install VPN client manually. +After successful download of installation file, you have to execute this executable with administrator or root rights and install VPN client manually. ## Working With VPN Client diff --git a/docs.it4i/general/obtaining-login-credentials/certificates-faq.md b/docs.it4i/general/obtaining-login-credentials/certificates-faq.md index 671db427ea0f0b3794ec44cb099c31ccde418fbd..eb1c58907a95484170b49d0c3c743fbe0d22acfc 100644 --- a/docs.it4i/general/obtaining-login-credentials/certificates-faq.md +++ b/docs.it4i/general/obtaining-login-credentials/certificates-faq.md @@ -157,3 +157,7 @@ A proxy certificate is a short-lived certificate which may be employed by UNICOR ## Q: Someone May Have Copied or Had Access to the Private Key of My Certificate Either in a Separate File or in the Browser. What Should I Do? Please ask the CA that issued your certificate to revoke this certificate and to supply you with a new one. In addition, please report this to IT4Innovations by contacting [the support team](https://support.it4i.cz/rt). + +## Q: My Certificate Expired. What Should I Do? + +In order to still be able to communicate with us, one has to make a request for the new certificate to your Certificate Authority (CA). There is no need to explicitly send us any information about your new certificate if a new one has the same Distinguished Name (DN) as the old one. diff --git a/docs.it4i/general/obtaining-login-credentials/obtaining-login-credentials.md b/docs.it4i/general/obtaining-login-credentials/obtaining-login-credentials.md index b5202bb65bbd85cce248d61da6d9a4c10d0a9a29..b8168ecff4353d6527b13892245d58b37e7fa645 100644 --- a/docs.it4i/general/obtaining-login-credentials/obtaining-login-credentials.md +++ b/docs.it4i/general/obtaining-login-credentials/obtaining-login-credentials.md @@ -13,7 +13,47 @@ Head of Supercomputing Services acts as a PI of a project DD-13-5. Joining this ## Authorization of PI by Allocation Committee -The PI is authorized to use the clusters by the allocation decision issued by the Allocation Committee.The PI will be informed by IT4I about the Allocation Committee decision. +The PI is authorized to use the clusters by the allocation decision issued by the Allocation Committee. The PI will be informed by IT4I about the Allocation Committee decision. + +## Login Credentials + +Once authorized by PI, every person (PI or Collaborator) wishing to access the clusters, should contact the [IT4I support](https://support.it4i.cz/rt/) (E-mail: [support\[at\]it4i.cz](mailto:support@it4i.cz)) providing following information: + +1. Project ID +1. Full name and affiliation +1. Statement that you have read and accepted the [Acceptable use policy document](http://www.it4i.cz/acceptable-use-policy.pdf) (AUP). +1. Attach the AUP file. +1. Your preferred username, max 12 characters long. The preferred username must associate your surname and name or be otherwise derived from it. Only alphanumeric sequences and dash signs are allowed. +1. In case you choose [Alternative way to personal certificate](#alternative-way-to-personal-certificate), a **scan of photo ID** (personal ID or passport or driver license) is required + +!!! warning + Should the above information be provided by e-mail, the e-mail **must be** digitally signed. Read more on [digital signatures](#certificates-for-digital-signatures) below. + +Example (except the subject line which must be in English, you may use Czech or Slovak language for communication with us): + +```console + Subject: Access to IT4Innovations + + Dear support, + + Please open the user account for me and attach the account to OPEN-0-0 + Name and affiliation: John Smith, john.smith@myemail.com, Department of Chemistry, MIT, US + I have read and accept the Acceptable use policy document (attached) + + Preferred username: johnsm + + Thank you, + John Smith + (Digitally signed) +``` + +You will receive your personal login credentials by protected e-mail. The login credentials include: + +1. username +1. ssh private key and private key passphrase +1. system password + +The clusters are accessed by the [private key](../accessing-the-clusters/shell-access-and-data-transfer/ssh-keys/) and username. Username and password is used for login to the [information systems](http://support.it4i.cz/). ## Authorization by Web @@ -56,46 +96,6 @@ Example (except the subject line which must be in English, you may use Czech or (Digitally signed) ``` -## Login Credentials - -Once authorized by PI, every person (PI or Collaborator) wishing to access the clusters, should contact the [IT4I support](https://support.it4i.cz/rt/) (E-mail: [support\[at\]it4i.cz](mailto:support@it4i.cz)) providing following information: - -1. Project ID -1. Full name and affiliation -1. Statement that you have read and accepted the [Acceptable use policy document](http://www.it4i.cz/acceptable-use-policy.pdf) (AUP). -1. Attach the AUP file. -1. Your preferred username, max 12 characters long. The preferred username must associate your surname and name or be otherwise derived from it. Only alphanumeric sequences, dash and underscore signs are allowed. -1. In case you choose [Alternative way to personal certificate](#alternative-way-to-personal-certificate), a **scan of photo ID** (personal ID or passport or driver license) is required - -!!! warning - Should the above information be provided by e-mail, the e-mail **must be** digitally signed. Read more on [digital signatures](#certificates-for-digital-signatures) below. - -Example (except the subject line which must be in English, you may use Czech or Slovak language for communication with us): - -```console - Subject: Access to IT4Innovations - - Dear support, - - Please open the user account for me and attach the account to OPEN-0-0 - Name and affiliation: John Smith, john.smith@myemail.com, Department of Chemistry, MIT, US - I have read and accept the Acceptable use policy document (attached) - - Preferred username: johnsm - - Thank you, - John Smith - (Digitally signed) -``` - -You will receive your personal login credentials by protected e-mail. The login credentials include: - -1. username -1. ssh private key and private key passphrase -1. system password - -The clusters are accessed by the [private key](../accessing-the-clusters/shell-access-and-data-transfer/ssh-keys/) and username. Username and password is used for login to the [information systems](http://support.it4i.cz/). - ## Change Passphrase On Linux, use @@ -106,10 +106,6 @@ local $ ssh-keygen -f id_rsa -p On Windows, use [PuTTY Key Generator](../accessing-the-clusters/shell-access-and-data-transfer/putty/#putty-key-generator). -## Change Password - -Change password in [your user profile](https://extranet.it4i.cz/user/). - ## Certificates for Digital Signatures We accept personal certificates issued by any widely respected certification authority (CA). This includes certificates by CAs organized in [International Grid Trust Federation](http://www.igtf.net/), its European branch [EUGridPMA](https://www.eugridpma.org/) and its member organizations, e.g. the [CESNET certification authority](https://tcs.cesnet.cz). The Czech _"Qualified certificate" (KvalifikovanĂ˝ certifikát)_ provided by [PostSignum](http://www.postsignum.cz/) or [I.CA](http://www.ica.cz/Kvalifikovany-certifikat.aspx), that is used in electronic contact with Czech authorities is accepted as well. diff --git a/docs.it4i/general/resource_allocation_and_job_execution.md b/docs.it4i/general/resource_allocation_and_job_execution.md new file mode 100644 index 0000000000000000000000000000000000000000..d21f7b9c9f419dea77fba42543b6550f6e696f8a --- /dev/null +++ b/docs.it4i/general/resource_allocation_and_job_execution.md @@ -0,0 +1,39 @@ +# Resource Allocation and Job Execution + +To run a [job](/#terminology-frequently-used-on-these-pages), [computational resources](/salomon/resources-allocation-policy/#resource-accounting-policy) for this particular job must be allocated. This is done via the PBS Pro job workload manager software, which distributes workloads across the supercomputer. Extensive information about PBS Pro can be found in the [PBS Pro User's Guide](/pbspro). + +## Resources Allocation Policy + +The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. [The Fair-share](/salomon/job-priority/#fair-share-priority) ensures that individual users may consume approximately equal amount of resources per week. The resources are accessible via queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. Following queues are are the most important: + +* **qexp**, the Express queue +* **qprod**, the Production queue +* **qlong**, the Long queue +* **qmpp**, the Massively parallel queue +* **qnvidia**, **qmic**, **qfat**, the Dedicated queues +* **qfree**, the Free resource utilization queue + +!!! note + Check the queue status at <https://extranet.it4i.cz/> + +Read more on the [Resource AllocationPolicy](/salomon/resources-allocation-policy) page. + +## Job Submission and Execution + +!!! note + Use the **qsub** command to submit your jobs. + +The qsub submits the job into the queue. The qsub command creates a request to the PBS Job manager for allocation of specified resources. The **smallest allocation unit is entire node, 16 cores**, with exception of the qexp queue. The resources will be allocated when available, subject to allocation policies and constraints. **After the resources are allocated the jobscript or interactive shell is executed on first of the allocated nodes.** + +Read more on the [Job submission and execution](/salomon/job-submission-and-execution) page. + +## Capacity Computing + +!!! note + Use Job arrays when running huge number of jobs. + +Use GNU Parallel and/or Job arrays when running (many) single core jobs. + +In many cases, it is useful to submit huge (100+) number of computational jobs into the PBS queue system. Huge number of (small) jobs is one of the most effective ways to execute embarrassingly parallel calculations, achieving best runtime, throughput and computer utilization. In this chapter, we discuss the the recommended way to run huge number of jobs, including **ways to run huge number of single core jobs**. + +Read more on [Capacity computing](/salomon/capacity-computing) page. diff --git a/docs.it4i/img/XWinlistentcp.png b/docs.it4i/img/XWinlistentcp.png deleted file mode 100644 index fdb297d57cebc7f76f7fff949f8cb7fcfed7f9e3..0000000000000000000000000000000000000000 Binary files a/docs.it4i/img/XWinlistentcp.png and /dev/null differ diff --git a/docs.it4i/img/cygwinX11forwarding.png b/docs.it4i/img/cygwinX11forwarding.png index 910a22d546a63d685090bb5981200ac20a911d83..7c2a3bcb138cc7cce77c878bd6f3ed69492e86b8 100644 Binary files a/docs.it4i/img/cygwinX11forwarding.png and b/docs.it4i/img/cygwinX11forwarding.png differ diff --git a/docs.it4i/img/global_ramdisk.png b/docs.it4i/img/global_ramdisk.png new file mode 100644 index 0000000000000000000000000000000000000000..27f98afcf4c0d2692c72e342330ee5fb5cdecd1e Binary files /dev/null and b/docs.it4i/img/global_ramdisk.png differ diff --git a/docs.it4i/index.md b/docs.it4i/index.md index b7a7bb2a724c74b121a8d0381d65881078b182fa..1d095539fe07ef804b16c63539173cbf9625fc6c 100644 --- a/docs.it4i/index.md +++ b/docs.it4i/index.md @@ -31,7 +31,7 @@ In many cases, you will run your own code on the cluster. In order to fully expl * **node:** a computer, interconnected by network to other computers - Computational nodes are powerful computers, designed and dedicated for executing demanding scientific computations. * **core:** processor core, a unit of processor, executing computations -* **corehours:** wall clock hours of processor core time - Each node is equipped with **X** processor cores, provides **X** corehours per 1 wall clock hour. +* **core-hour:** also normalized core-hour, NCH. A metric of computer utilization, [see definition](salomon/resources-allocation-policy/#normalized-core-hours-nch). * **job:** a calculation running on the supercomputer - The job allocates and utilizes resources of the supercomputer for certain time. * **HPC:** High Performance Computing * **HPC (computational) resources:** corehours, storage capacity, software licences @@ -59,4 +59,9 @@ local $ ## Errata -Although we have taken every care to ensure the accuracy of our content, mistakes do happen. If you find a mistake in the text or the code we would be grateful if you would report this to us. By doing so, you can save other readers from frustration and help us improve subsequent versions of this documentation. If you find any errata, please report them by visiting <http://support.it4i.cz/rt>, creating a new ticket, and entering the details of your errata. Once your errata are verified, your submission will be accepted and the errata will be uploaded on our website. +Although we have taken every care to ensure the accuracy of the content, mistakes do happen. +If you find an inconsistency or error, please report it by visiting <http://support.it4i.cz/rt>, creating a new ticket, and entering the details. +By doing so, you can save other readers from frustration and help us improve. + +!!! tip + We will fix the problem as soon as possible. diff --git a/docs.it4i/job-features.md b/docs.it4i/job-features.md new file mode 100644 index 0000000000000000000000000000000000000000..679294ff729bd0554bf5fa55bdb9eff8692f9693 --- /dev/null +++ b/docs.it4i/job-features.md @@ -0,0 +1,101 @@ +# Job Features + +Special features installed/configured on the fly on allocated nodes, features are requested in PBS job. + +```console +$ qsub... -l feature=req +``` + +## VTune Support + +Load VTune kernel modules. + +```console +$ qsub ... -l vtune=version_string +``` + +version_string is VTune version e.g. 2017_update2 + +## MIC Development Support + +Install development packages (gcc, g++, make, automake, autoconf, bison, flex, perl, libraries, ...) on MIC accelerators. + +```console +$ qsub ... -l mic_devel=true +``` + +Available on Salomon Perrin nodes. + +## Global RAM Disk + +Create global shared file system consisting of RAM disks of allocated nodes. File-system is mounted on /mnt/global_ramdisk. + +```console +$ qsub ... -l global_ramdisk=true +``` + +Available on Salomon nodes. + +## Virtualization Network + +Configure network for virtualization, create interconnect for fast communication between node (host) and virtual machine (guest). + +```console +$ qsub ... -l virt_network=true +``` + +[See Tap Interconnect](/anselm/software/virtualization/#tap-interconnect) + +## x86 Adapt Support + +Load kernel module, that allows changing/toggling system parameters stored in MSR and PCI registers of x86 processors. + +```console +$ qsub ... -l x86_adapt=true +``` + +Hazardous, it causes CPU frequency disruption. + +Available on Salomon nodes. + +## Disabling Intel Turbo Boost on CPU + +Intel Turbo Boost on CPU is enabled on all all compute nodes. + +To disable Intel Turbo Boost on CPU + +```console +$ qsub ... -l cpu_turbo_boost=false +``` + +## Offlining CPU Cores + +Not available. + +To offline N CPU cores + +```console +$ qsub ... -l cpu_offline_cores=N +``` + +To offline CPU cores according pattern + +```console +$ qsub ... -l cpu_offline_cores=PATTERN +``` + +where pattern is list of core's numbers to offline separated by character 'c' e.g. "5c11c16c23c" + +Hazardous, it causes Lustre threads disruption. + +## Setting Intel Hyper Threading on CPU + +Not available, requires changed BIOS settings. + +Intel Hyper Threading is disabled by default. + +To enable Intel Hyper Threading on allocated nodes CPUs + +```console +$ qsub ... -l cpu_hyper_threading=true +``` \ No newline at end of file diff --git a/docs.it4i/modules-anselm.md b/docs.it4i/modules-anselm.md index 7e805edcc6ca32b867a9a4b17f13bddf67521fcd..0cb167fb850e995b522485fb7ae3a7114dbb8fc5 100644 --- a/docs.it4i/modules-anselm.md +++ b/docs.it4i/modules-anselm.md @@ -1,11 +1,13 @@ # Available Modules -## Core +## Os | Module | Description | | ------ | ----------- | -| lmod | | -| settarg | | +| [CentOS](https://www.centos.org/) | The CentOS Project is a community-driven free software effort focused on delivering a robust open source ecosystem. For users, we offer a consistent manageable platform that suits a wide variety of deployments. For open source communities, we offer a solid, predictable base to build upon, along with extensive resources to build, test, release, and maintain their code. We’re also expanding the availability of CentOS images across a number of vendors, providing official images for Amazon, Google, and more. For self-hosted cloud, we also provide a generic cloud-init enabled image. For more information about updates and improvements in CentOS 7, please check out the release notes or the release announcement in the mailing list archive. | +| [Debian](https://www.ubuntu.com/) | Ubuntu is a Debian-based Linux operating system for personal computers, tablets and smartphones, where Ubuntu Touch edition is used. It also runs network servers. That is usually with the Ubuntu Server edition, either on physical or virtual servers (such as on mainframes) or with containers, that is with enterprise-class features. It runs on the most popular architectures, including server-class ARM-based. Ubuntu is published by Canonical Ltd, who offer commercial support. It is based on free software and named after the Southern African philosophy of ubuntu (literally, 'human-ness'), which Canonical Ltd. suggests can be loosely translated as "humanity to others" or "I am what I am because of who we all are". Since Ubuntu 11.04 Natty Narwhal Ubuntu has used Unity as its default user interface for the desktop, but following the release of Ubuntu 17.10 it will move to the GNOME 3 desktop instead, as work on Unity ends. Ubuntu is the most popular operating system running in hosted environments, so–called "clouds", as it is the most popular server Linux distribution. Development of Ubuntu is led by UK-based Canonical Ltd., a company of South African entrepreneur Mark Shuttleworth. Canonical generates revenue through the sale of technical support and other services related to Ubuntu. The Ubuntu project is publicly committed to the principles of open-source software development; people are encouraged to use free software, study how it works, improve upon it, and distribute it. | +| [Fedora](https://fedoraproject.org) | The Fedora Project is a global partnership of free software community members. The Fedora Project is sponsored by Red Hat, which invests in our infrastructure and resources to encourage collaboration and incubate innovative new technologies. Some of these technologies may later be integrated into Red Hat products. They are developed in Fedora and produced under a free and open source license from inception, so other free software communities and projects are free to study, adopt, and modify them. Read an overview to learn more about our mission, our community, our governance, and what makes Fedora unique. You can also learn about our vision and core values — the foundations upon which the project is built. We also have information relating to our user base, and the objectives for our technical work. | +| [Ubuntu](https://www.ubuntu.com/) | Ubuntu is a Debian-based Linux operating system for personal computers, tablets and smartphones, where Ubuntu Touch edition is used. It also runs network servers. That is usually with the Ubuntu Server edition, either on physical or virtual servers (such as on mainframes) or with containers, that is with enterprise-class features. It runs on the most popular architectures, including server-class ARM-based. Ubuntu is published by Canonical Ltd, who offer commercial support. It is based on free software and named after the Southern African philosophy of ubuntu (literally, 'human-ness'), which Canonical Ltd. suggests can be loosely translated as "humanity to others" or "I am what I am because of who we all are". Since Ubuntu 11.04 Natty Narwhal Ubuntu has used Unity as its default user interface for the desktop, but following the release of Ubuntu 17.10 it will move to the GNOME 3 desktop instead, as work on Unity ends. Ubuntu is the most popular operating system running in hosted environments, so–called "clouds", as it is the most popular server Linux distribution. Development of Ubuntu is led by UK-based Canonical Ltd., a company of South African entrepreneur Mark Shuttleworth. Canonical generates revenue through the sale of technical support and other services related to Ubuntu. The Ubuntu project is publicly committed to the principles of open-source software development; people are encouraged to use free software, study how it works, improve upon it, and distribute it. | ## Bio @@ -16,28 +18,26 @@ | [GROMACS](http://www.gromacs.org) | GROMACS is a versatile package to perform molecular dynamics, i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles. | | [PLUMED](http://www.plumed-code.org) | PLUMED is an open source library for free energy calculations in molecular systems which works together with some of the most popular molecular dynamics engines. Free energy calculations can be performed as a function of many order parameters with a particular focus on biological problems, using state of the art methods such as metadynamics, umbrella sampling and Jarzynski-equation based steered MD. The software, written in C++, can be easily interfaced with both fortran and C/C++ codes. | -## Bullxde +## Cae | Module | Description | | ------ | ----------- | -| bullxde | | - -## Bullxmpi - -| Module | Description | -| ------ | ----------- | -| bullxmpi | | +| COMSOL | | +| [deMonNano](http://www.demon-software.com) | deMonNano is a software package for density functional theory based tight binding calculations. It is part of the deMon (density of MontrĂ©al) suite of programs. The present version of the code of deMon for DFT is now known as deMon2k. This code and general information about deMon are available at http://www.demon-software.com | +| OpenFOAM | | ## Chem | Module | Description | | ------ | ----------- | -| abinit | | +| ABINIT | | | cp2k-mpi | | | lammps | | +| libxc | | | molpro | | | namd | | -| nwchem | | +| [NWChem](http://www.nwchem-sw.org) | NWChem aims to provide its users with computational chemistry tools that are scalable both in their ability to treat large scientific computational chemistry problems efficiently, and in their use of available parallel computing resources from high-performance parallel supercomputers to conventional workstation clusters. NWChem software can handle: biomolecules, nanostructures, and solid-state; from quantum to classical, and all combinations; Gaussian basis functions or plane-waves; scaling from one to thousands of processors; properties and relativity. | +| [OpenBabel](http://openbabel.org) | Open Babel is a chemical toolbox designed to speak the many languages of chemical data. It's an open, collaborative project allowing anyone to search, convert, analyze, or store data from molecular modeling, chemistry, solid-state materials, biochemistry, or related areas. | | [ORCA](http://cec.mpg.de/forum/) | ORCA is a flexible, efficient and easy-to-use general purpose tool for quantum chemistry with specific emphasis on spectroscopic properties of open-shell molecules. It features a wide variety of standard quantum chemical methods ranging from semiempirical methods to DFT to single- and multireference correlated ab initio methods. It can also treat environmental and relativistic effects. | | [PLUMED](http://www.plumed-code.org) | PLUMED is an open source library for free energy calculations in molecular systems which works together with some of the most popular molecular dynamics engines. Free energy calculations can be performed as a function of many order parameters with a particular focus on biological problems, using state of the art methods such as metadynamics, umbrella sampling and Jarzynski-equation based steered MD. The software, written in C++, can be easily interfaced with both fortran and C/C++ codes. | | [QuantumESPRESSO](http://www.pwscf.org/) | Quantum ESPRESSO is an integrated suite of computer codes for electronic-structure calculations and materials modeling at the nanoscale. It is based on density-functional theory, plane waves, and pseudopotentials (both norm-conserving and ultrasoft). | @@ -45,27 +45,22 @@ ## Compiler -| Module | Description | -| ------ | ----------- | -| GCC | | -| GCCcore | | -| icc | | -| ifort | | - -## Compilers - | Module | Description | | ------ | ----------- | | bupc | | | chicken | | +| [Clang](http://clang.llvm.org/) | C, C++, Objective-C compiler, based on LLVM. Does not include C++ standard library -- use libstdc++ from GCC. | | gcc | | | [GCC](http://gcc.gnu.org/) | The GNU Compiler Collection includes front ends for C, C++, Objective-C, Fortran, Java, and Ada, as well as libraries for these languages (libstdc++, libgcj,...). | -| [GCCcore](http://gcc.gnu.org/) | The GNU Compiler Collection includes front ends for C, C++, Objective-C, Fortran, Java, and Ada, as well as libraries for these languages (libstdc++, libgcj,...). | +| [GCCcore](http://gcc.gnu.org/) | The GNU Compiler Collection includes front ends for C, C++, Objective-C, Fortran, Java, and Ada, as well as libraries for these languages (libstdc++, libgcj,...). [NOTE: This module does not include Objective-C, Java or Ada] | | gupc | | | [icc](http://software.intel.com/en-us/intel-compilers/) | C and C++ compiler from Intel | | [ifort](http://software.intel.com/en-us/intel-compilers/) | Fortran compiler from Intel | +| ispc | | | java | | -| llvm | | +| LLVM | | +| [OpenCoarrays](http://www.opencoarrays.org/) | A transport layer for coarray Fortran compilers. | +| [PGI](http://www.pgroup.com/) | C, C++ and Fortran compilers from The Portland Group - PGI | ## Data @@ -73,6 +68,9 @@ | ------ | ----------- | | [GDAL](http://www.gdal.org/) | GDAL is a translator library for raster geospatial data formats that is released under an X/MIT style Open Source license by the Open Source Geospatial Foundation. As a library, it presents a single abstract data model to the calling application for all supported formats. It also comes with a variety of useful commandline utilities for data translation and processing. | | [HDF5](http://www.hdfgroup.org/HDF5/) | HDF5 is a unique technology suite that makes possible the management of extremely large and complex data collections. | +| netCDF | | +| netCDF-Fortran | | +| [scikit-learn](http://scikit-learn.org/stable/index.html) | Scikit-learn integrates machine learning algorithms in the tightly-knit scientific Python world, building upon numpy, scipy, and matplotlib. As a machine-learning module, it provides versatile tools for data mining and analysis in any field of science and engineering. It strives to be simple and efficient, accessible to everybody, and reusable in various contexts. | ## Debugger @@ -85,32 +83,52 @@ | Module | Description | | ------ | ----------- | +| [ant](http://ant.apache.org/) | Apache Ant is a Java library and command-line tool whose mission is to drive processes described in build files as targets and extension points dependent upon each other. The main known usage of Ant is the build of Java applications. | | [Autoconf](http://www.gnu.org/software/autoconf/) | Autoconf is an extensible package of M4 macros that produce shell scripts to automatically configure software source code packages. These scripts can adapt the packages to many kinds of UNIX-like systems without manual user intervention. Autoconf creates a configuration script for a package from a template file that lists the operating system features that the package can use, in the form of M4 macro calls. | | [Automake](http://www.gnu.org/software/automake/automake.html) | Automake: GNU Standards-compliant Makefile generator | | [Autotools](http://autotools.io) | This bundle collect the standard GNU build tools: Autoconf, Automake and libtool | +| Bazel | | | [Boost](http://www.boost.org/) | Boost provides free peer-reviewed portable C++ source libraries. | +| [Caffe](https://github.com/BVLC/caffe) | Caffe is a deep learning framework made with expression, speed, and modularity in mind. It is developed by the Berkeley Vision and Learning Center (BVLC) and community contributors. | | [CMake](http://www.cmake.org) | CMake, the cross-platform, open-source build system. CMake is a family of tools designed to build, test and package software. | -| [Doxygen](http://www.doxygen.org) | Doxygen is a documentation system for C++, C, Java, Objective-C, Python, IDL (Corba and Microsoft flavors), Fortran, VHDL, PHP, C#, and to some extent D. | +| Doxygen | | | [fontsproto](http://www.freedesktop.org/wiki/Software/xlibs) | X11 font extension wire protocol | +| [gflags](https://github.com/gflags/gflags) | The gflags package contains a C++ library that implements commandline flags processing. It includes built-in support for standard types such as string and the ability to define flags in the source file in which they are used. | +| [glog](https://github.com/google/glog) | A C++ implementation of the Google logging module. | +| [GObject-Introspection](https://wiki.gnome.org/GObjectIntrospection/) | GObject introspection is a middleware layer between C libraries (using GObject) and language bindings. The C library can be scanned at compile time and generate a metadata file, in addition to the actual native C library. Then at runtime, language bindings can read this metadata and automatically provide bindings to call into the C library. | +| [gperf](http://www.gnu.org/software/gperf/) | GNU gperf is a perfect hash function generator. For a given list of strings, it produces a hash function and hash table, in form of C or C++ code, for looking up a value depending on the input string. The hash function is perfect, which means that the hash table has no collisions, and the hash table lookup needs a single string comparison only. | | [guile](http://www.gnu.org/software/guile) | Guile is the GNU Ubiquitous Intelligent Language for Extensions, the official extension language for the GNU operating system. | +| [JUnit](http://sourceforge.net/projects/junit) | A programmer-oriented testing framework for Java. | +| [LevelDB](https://github.com/google/leveldb) | LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values. | | [libSM](http://www.freedesktop.org/wiki/Software/xlibs) | X11 Session Management library, which allows for applications to both manage sessions, and make use of session managers to save and restore their state for later use. | +| [LMDB](https://github.com/LMDB/lmdb) | OpenLDAP's Lightning Memory-Mapped Database (LMDB) library. | | [M4](http://www.gnu.org/software/m4/m4.html) | GNU M4 is an implementation of the traditional Unix macro processor. It is mostly SVR4 compatible although it has some extensions (for example, handling more than 9 positional parameters to macros). GNU M4 also has built-in functions for including files, running shell commands, doing arithmetic, etc. | | [make](http://www.gnu.org/software/make/make.html) | make-3.82: GNU version of make utility | | [makedepend](http://www.linuxfromscratch.org/blfs/view/svn/x/makedepend.html) | The makedepend package contains a C-preprocessor like utility to determine build-time dependencies. | +| Mako | | | Maven | | | [ncurses](http://www.gnu.org/software/ncurses/) | The Ncurses (new curses) library is a free software emulation of curses in System V Release 4.0, and more. It uses Terminfo format, supports pads and color and multiple highlights and forms characters and function-key mapping, and has all the other SYSV-curses enhancements over BSD Curses. | | [PCRE](http://www.pcre.org/) | The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5. | | [pkg-config](http://www.freedesktop.org/wiki/Software/pkg-config/) | pkg-config is a helper tool used when compiling applications and libraries. It helps you insert the correct compiler options on the command line so an application can use gcc -o test test.c `pkg-config --libs --cflags glib-2.0` for instance, rather than hard-coding values on where to find glib (or other libraries). | +| [protobuf](https://github.com/google/protobuf/) | Google Protocol Buffers | | [Qt](http://qt-project.org/) | Qt is a comprehensive cross-platform C++ application framework. | +| Qt5 | | | [renderproto](http://www.freedesktop.org/wiki/Software/xlibs) | Xrender protocol and ancillary headers | | [SCons](http://www.scons.org/) | SCons is a software construction tool. | +| [Spack](https://spack.io/) | Spack is a package manager for supercomputers, Linux, and macOS. It makes installing scientific software easy. With Spack, you can build a package with multiple versions, configurations, platforms, and compilers, and all of these builds can coexist on the same machine. | | [SQLite](http://www.sqlite.org/) | SQLite: SQL Database Engine in a C Library | +| [SWIG](http://www.swig.org/) | SWIG is a software development tool that connects programs written in C and C++ with a variety of high-level programming languages. | | [xbitmaps](http://www.freedesktop.org/wiki/Software/xlibs) | provides bitmaps for x | | [xcb-proto](http://xcb.freedesktop.org/) | The X protocol C-language Binding (XCB) is a replacement for Xlib featuring a small footprint, latency hiding, direct access to the protocol, improved threading support, and extensibility. | +| xcb-util | | +| xcb-util-image | | +| xcb-util-keysyms | | +| xcb-util-renderutil | | +| xcb-util-wm | | | [xextproto](http://www.freedesktop.org/wiki/Software/xlibs) | XExtProto protocol headers. | | [xineramaproto](http://www.freedesktop.org/wiki/Software/xlibs) | X protocol and ancillary headers for xinerama | | [xorg-macros](http://cgit.freedesktop.org/xorg/util/macros) | X.org macros utilities. | -| [xproto](http://www.freedesktop.org/wiki/Software/xlibs) | X protocol and ancillary headers | +| xproto | | | [xtrans](http://www.freedesktop.org/wiki/Software/xlibs) | xtrans includes a number of routines to make X implementations transport-independent; at time of writing, it includes support for UNIX sockets, IPv4, IPv6, and DECnet. | ## Engineering @@ -150,44 +168,80 @@ | PrgEnv-gnu | | | PrgEnv-intel | | +## Geo + +| Module | Description | +| ------ | ----------- | +| [Doris](http://doris.tudelft.nl/) | Delft object-oriented radar interferometric software | +| [PHREEQC](https://wwwbrr.cr.usgs.gov/projects/GWC_coupled/phreeqc/) | A Computer Program for Speciation, Batch-Reaction, One-Dimensional Transport, and Inverse Geochemical Calculations | + +## Init + +| Module | Description | +| ------ | ----------- | + ## Lang | Module | Description | | ------ | ----------- | +| [Anaconda2](https://www.continuum.io/anaconda-overview) | Built to complement the rich, open source Python community, the Anaconda platform provides an enterprise-ready data analytics platform that empowers companies to adopt a modern open data science analytics architecture. | +| [Anaconda3](https://www.continuum.io/anaconda-overview) | Built to complement the rich, open source Python community, the Anaconda platform provides an enterprise-ready data analytics platform that empowers companies to adopt a modern open data science analytics architecture. | | [Bison](http://www.gnu.org/software/bison) | Bison is a general-purpose parser generator that converts an annotated context-free grammar into a deterministic LR or generalized LR (GLR) parser employing LALR(1) parser tables. | | [byacc](http://invisible-island.net/byacc/byacc.html) | Berkeley Yacc (byacc) is generally conceded to be the best yacc variant available. In contrast to bison, it is written to avoid dependencies upon a particular compiler. | | [flex](http://flex.sourceforge.net/) | Flex (Fast Lexical Analyzer) is a tool for generating scanners. A scanner, sometimes called a tokenizer, is a program which recognizes lexical patterns in text. | | [Java](http://java.com/) | Java Platform, Standard Edition (Java SE) lets you develop and deploy Java applications on desktops and servers. | | [libgdiplus](https://github.com/mono/libgdiplus) | An Open Source implementation of the GDI+ API. | | [Lua](http://www.lua.org/) | Lua is a powerful, fast, lightweight, embeddable scripting language. Lua combines simple procedural syntax with powerful data description constructs based on associative arrays and extensible semantics. Lua is dynamically typed, runs by interpreting bytecode for a register-based virtual machine, and has automatic memory management with incremental garbage collection, making it ideal for configuration, scripting, and rapid prototyping. | -| Mono | | +| [Mono](http://mono-framework.com) | An open source, cross-platform, implementation of C# and the CLR that is binary compatible with Microsoft.NET. | | [NASM](http://www.nasm.us/) | NASM: General-purpose x86 assembler | -| Perl | | +| [OpenCL-runtime](https://software.intel.com/en-us/intel-opencl) | OpenCL™ is the first open, royalty-free standard for cross-platform, parallel programming of modern processors found in personal computers, servers and handheld/embedded devices. OpenCL (Open Computing Language) greatly improves speed and responsiveness for a wide spectrum of applications in numerous market categories from gaming and entertainment to scientific and medical software. | +| [Perl](http://www.perl.org/) | Larry Wall's Practical Extraction and Report Language | | [Python](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | +| [R](http://www.r-project.org/) | R is a free software environment for statistical computing and graphics. | | [Ruby](https://www.ruby-lang.org) | Ruby is a dynamic, open source programming language with a focus on simplicity and productivity. It has an elegant syntax that is natural to read and easy to write. | -| [Tcl](http://www.tcl.tk/) | Tcl (Tool Command Language) is a very powerful but easy to learn dynamic programming language, suitable for a very wide range of uses, including web and desktop applications, networking, administration, testing and many more. | +| Tcl | | +| [Yasm](http://www.tortall.net/projects/yasm/) | Yasm: Complete rewrite of the NASM assembler with BSD license | ## Lib | Module | Description | | ------ | ----------- | -| [libdrm](http://dri.freedesktop.org) | Direct Rendering Manager runtime library. | +| [astroid](https://github.com/PyCQA/astroid) | The aim of this module is to provide a common base representation of python source code for projects such as pychecker, pyreverse, pylint… Well, actually the development of this library is essentially governed by pylint’s needs. It used to be called logilab-astng. | +| [Embree](https://embree.github.io/index.html) | Embree is a collection of high-performance ray tracing kernels, developed at Intel. | +| [freeglut](http://freeglut.sourceforge.net/) | freeglut is a completely OpenSourced alternative to the OpenGL Utility Toolkit (GLUT) library. | +| [GLM](https://github.com/g-truc/glm) | OpenGL Mathematics (GLM) is a header only C++ mathematics library for graphics software based on the OpenGL Shading Language (GLSL) specifications. | +| [isort](https://github.com/timothycrosley/isort) | isort is a Python utility / library to sort imports alphabetically, and automatically separated into sections. It provides a command line utility, Python library and plugins for various editors to quickly sort all your imports. | +| libdrm | | | libevent | | | [libffi](http://sourceware.org/libffi/) | The libffi library provides a portable, high level programming interface to various calling conventions. This allows a programmer to call any function specified by a call interface description at run-time. | | [libfontenc](http://www.freedesktop.org/wiki/Software/xlibs/) | X11 font encoding library | +| [libgd](https://libgd.github.io/) | GD is an open source code library for the dynamic creation of images by programmers. | | [libjpeg-turbo](http://sourceforge.net/projects/libjpeg-turbo/) | libjpeg-turbo is a fork of the original IJG libjpeg which uses SIMD to accelerate baseline JPEG compression and decompression. libjpeg is a library that implements JPEG image encoding, decoding and transcoding. | | [libmatheval](http://www.gnu.org/software/libmatheval/) | GNU libmatheval is a library (callable from C and Fortran) to parse and evaluate symbolic expressions input as text. | | [libpng](http://www.libpng.org/pub/png/libpng.html) | libpng is the official PNG reference library | | [libpthread-stubs](http://xcb.freedesktop.org/) | The X protocol C-language Binding (XCB) is a replacement for Xlib featuring a small footprint, latency hiding, direct access to the protocol, improved threading support, and extensibility. | | [libreadline](http://cnswww.cns.cwru.edu/php/chet/readline/rltop.html) | The GNU Readline library provides a set of functions for use by applications that allow users to edit command lines as they are typed in. Both Emacs and vi editing modes are available. The Readline library includes additional functions to maintain a list of previously-entered command lines, to recall and perhaps reedit those lines, and perform csh-like history expansion on previous commands. | +| [libsndfile](http://www.mega-nerd.com/libsndfile) | Libsndfile is a C library for reading and writing files containing sampled sound (such as MS Windows WAV and the Apple/SGI AIFF format) through one standard library interface. | +| LIBSVM | | | [LibTIFF](http://www.remotesensing.org/libtiff/) | tiff: Library and tools for reading and writing TIFF data files | | libtool | | | [libunistring](http://www.gnu.org/software/libunistring/) | This library provides functions for manipulating Unicode strings and for manipulating C strings according to the Unicode standard. | -| [libxcb](http://xcb.freedesktop.org/) | The X protocol C-language Binding (XCB) is a replacement for Xlib featuring a small footprint, latency hiding, direct access to the protocol, improved threading support, and extensibility. | -| [libxml2](http://xmlsoft.org/) | Libxml2 is the XML C parser and toolchain developed for the Gnome project (but usable outside of the Gnome platform). | +| libxcb | | +| libxml2 | | +| [libxslt](http://xmlsoft.org/) | Libxslt is the XSLT C library developed for the GNOME project (but usable outside of the Gnome platform). | +| [libyaml](http://pyyaml.org/wiki/LibYAML) | LibYAML is a YAML 1.1 parser and emitter written in C. | +| [lxml](http://lxml.de/) | The lxml XML toolkit is a Pythonic binding for the C libraries libxml2 and libxslt. | +| [MATIO](http://sourceforge.net/projects/matio/) | matio is an C library for reading and writing Matlab MAT files. | +| [mpi4py](https://bitbucket.org/mpi4py/mpi4py) | MPI for Python (mpi4py) provides bindings of the Message Passing Interface (MPI) standard for the Python programming language, allowing any Python program to exploit multiple processors. | +| [nettle](http://www.lysator.liu.se/~nisse/nettle/) | Nettle is a cryptographic library that is designed to fit easily in more or less any context: In crypto toolkits for object-oriented languages (C++, Python, Pike, ...), in applications like LSH or GNUPG, or even in kernel space. | +| [PROJ](http://trac.osgeo.org/proj/) | Program proj is a standard Unix filter function which converts geographic longitude and latitude coordinates into cartesian coordinates | +| [pylint](https://github.com/PyCQA/pylint) | Pylint is a Python source code analyzer which looks for programming errors, helps enforcing a coding standard and sniffs for some code smells (as defined in Martin Fowler's Refactoring book). | +| [PyYAML](https://pypi.python.org/pypi/PyYAML/) | PyYAML is a YAML parser and emitter for the Python programming language. | | [SIONlib](http://www.fz-juelich.de/ias/jsc/EN/Expertise/Support/Software/SIONlib/_node.html) | SIONlib is a scalable I/O library for parallel access to task-local files. The library not only supports writing and reading binary data to or from several thousands of processors into a single or a small number of physical files, but also provides global open and close functions to access SIONlib files in parallel. This package provides a stripped-down installation of SIONlib for use with performance tools (e.g., Score-P), with renamed symbols to avoid conflicts when an application using SIONlib itself is linked against a tool requiring a different SIONlib version. | +| [snappy](https://github.com/google/snappy) | Snappy is a compression/decompression library. It does not aim for maximum compression, or compatibility with any other compression library; instead, it aims for very high speeds and reasonable compression. | | [spGPU](https://github.com/davidebarbieri/spgpu) | spGPU is a set of custom matrix storages and CUDA kernels for sparse linear algebra computing on GPU. It isn't a replacement for cuBLAS/cuSPARSE that should be used for a full featured linear algebra environment on GPU. | -| tbb | | +| [tbb](http://software.intel.com/en-us/articles/intel-tbb/) | Intel Threading Building Blocks 4.0 (Intel TBB) is a widely used, award-winning C++ template library for creating reliable, portable, and scalable parallel applications. Use Intel TBB for a simple and rapid way of developing robust task-based parallel applications that scale to available processor cores, are compatible with multiple environments, and are easier to maintain. Intel TBB is the most proficient way to implement future-proof parallel applications that tap into the power and performance of multicore and manycore hardware platforms. | +| Tensorflow | | | zlib | | ## Libraries @@ -195,7 +249,6 @@ | Module | Description | | ------ | ----------- | | adios | | -| boost | | | dataspaces | | | fftw2 | | | fftw2-mpi | | @@ -206,7 +259,6 @@ | hdf5 | | | hdf5-parallel | | | ipp | | -| libmesh | | | magma | | | mkl | | | mxml | | @@ -218,31 +270,42 @@ | opencl-sdk | | | petsc | | | plasma | | -| slepc | | | szip | | | tbb | | | trilinos | | -| zlib | | ## Math | Module | Description | | ------ | ----------- | +| Eigen | | +| [GEOS](http://trac.osgeo.org/geos) | GEOS (Geometry Engine - Open Source) is a C++ port of the Java Topology Suite (JTS) | | [GMP](http://gmplib.org/) | GMP is a free library for arbitrary precision arithmetic, operating on signed integers, rational numbers, and floating point numbers. | +| [h5py](https://github.com/jupyter/testpath) | Test utilities for code working with files and commands | | [ISL](http://isl.gforge.inria.fr/) | isl is a library for manipulating sets and relations of integer points bounded by linear constraints. | +| [Keras](https://keras.io/) | Keras is a minimalist, highly modular neural networks library, written in Python and capable of running on top of either TensorFlow or Theano. | +| [libcerf](http://gnuplot.sourceforge.net/) | libcerf is a self-contained numeric library that provides an efficient and accurate implementation of complex error functions, along with Dawson, Faddeeva, and Voigt functions. | +| [MATLAB](http://www.mathworks.com/products/matlab) | MATLAB is a high-level language and interactive environment that enables you to perform computationally intensive tasks faster than with traditional programming languages such as C, C++, and Fortran. | +| METIS | | | [MLD2P4](http://www.mld2p4.it) | MLD2P4 (Multi-Level Domain Decomposition Parallel Preconditioners Package based on PSBLAS) is a package of parallel algebraic multi-level preconditioners. It implements various versions of one-level additive and of multi-level additive and hybrid Schwarz algorithms. In the multi-level case, a purely algebraic approach is applied to generate coarse-level corrections, so that no geometric background is needed concerning the matrix to be preconditioned. The matrix is assumed to be square, real or complex, with a symmetric sparsity pattern. | -| [numpy](http://www.numpy.org) | NumPy is the fundamental package for scientific computing with Python. It contains among other things: a powerful N-dimensional array object, sophisticated (broadcasting) functions, tools for integrating C/C++ and Fortran code, useful linear algebra, Fourier transform, and random number capabilities. Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases. | +| MPFR | | +| [numpy](https://github.com/jupyter/testpath) | Test utilities for code working with files and commands | | [Octave](http://www.gnu.org/software/octave/) | GNU Octave is a high-level interpreted language, primarily intended for numerical computations. | +| [ParMETIS](http://glaros.dtc.umn.edu/gkhome/metis/parmetis/overview) | ParMETIS is an MPI-based parallel library that implements a variety of algorithms for partitioning unstructured graphs, meshes, and for computing fill-reducing orderings of sparse matrices. ParMETIS extends the functionality provided by METIS and includes routines that are especially suited for parallel AMR computations and large scale numerical simulations. The algorithms implemented in ParMETIS are based on the parallel multilevel k-way graph-partitioning, adaptive repartitioning, and parallel multi-constrained partitioning schemes. | | [PSBLAS](http://people.uniroma2.it/salvatore.filippone/psblas/) | Most computationally intensive applications work on irregular and sparse domains that complicate their implementation on parallel machines. The major goal of the Parallel Sparse Basic Linear Algebra Subroutines (PSBLAS) project is to provide a framework to enable easy, efficient and portable implementations of iterative solvers for linear systems, while shielding the user from most details of their parallelization. The interface is designed keeping in view a Single Program Multiple Data programming model on distributed memory machines. | | [PSBLAS-ext](http://people.uniroma2.it/salvatore.filippone/psblas/) | PSBLAS - Extended formats and NVIDIA GPU support | +| [Qhull](http://www.qhull.org) | Qhull computes the convex hull, Delaunay triangulation, Voronoi diagram, halfspace intersection about a point, furthest-site Delaunay triangulation, and furthest-site Voronoi diagram. The source code runs in 2-d, 3-d, 4-d, and higher dimensions. Qhull implements the Quickhull algorithm for computing the convex hull. | | [ScientificPython](https://sourcesup.cru.fr/projects/scientific-py/) | ScientificPython is a collection of Python modules for scientific computing. It contains support for geometry, mathematical functions, statistics, physical units, IO, visualization, and parallelization. | +| [scipy](https://github.com/jupyter/testpath) | Test utilities for code working with files and commands | +| SCOTCH | | +| [Theano](http://deeplearning.net/software/theano) | Theano is a Python library that allows you to define, optimize, and evaluate mathematical expressions involving multi-dimensional arrays efficiently. | ## Mpi | Module | Description | | ------ | ----------- | | bullxmpi | | -| impi | | +| [impi](http://software.intel.com/en-us/intel-mpi-library/) | The Intel(R) MPI Library for Linux* OS is a multi-fabric message passing library based on ANL MPICH2 and OSU MVAPICH2. The Intel MPI Library for Linux OS implements the Message Passing Interface, version 2 (MPI-2) specification. | | lam | | | [MPICH](http://www.mpich.org/) | MPICH v3.x is an open source high-performance MPI 3.0 implementation. It does not support InfiniBand (use MVAPICH2 with InfiniBand devices). | | mvapich2 | | @@ -256,18 +319,22 @@ | [Armadillo](http://arma.sourceforge.net/) | Armadillo is an open-source C++ linear algebra library (matrix maths) aiming towards a good balance between speed and ease of use. Integer, floating point and complex numbers are supported, as well as a subset of trigonometric and statistics functions. | | [arpack-ng](http://forge.scilab.org/index.php/p/arpack-ng/) | ARPACK is a collection of Fortran77 subroutines designed to solve large scale eigenvalue problems. | | [ATLAS](http://math-atlas.sourceforge.net) | ATLAS (Automatically Tuned Linear Algebra Software) is the application of the AEOS (Automated Empirical Optimization of Software) paradigm, with the present emphasis on the Basic Linear Algebra Subprograms (BLAS), a widely used, performance-critical, linear algebra kernel library. | +| CGAL | | +| Clp | | | cuDNN | | | [FFTW](http://www.fftw.org) | FFTW is a C subroutine library for computing the discrete Fourier transform (DFT) in one or more dimensions, of arbitrary input size, and of both real and complex data. | | [GSL](http://www.gnu.org/software/gsl/) | The GNU Scientific Library (GSL) is a numerical library for C and C++ programmers. The library provides a wide range of mathematical routines such as random number generators, special functions and least-squares fitting. | | [imkl](http://software.intel.com/en-us/intel-mkl/) | Intel Math Kernel Library is a library of highly optimized, extensively threaded math routines for science, engineering, and financial applications that require maximum performance. Core math functions include BLAS, LAPACK, ScaLAPACK, Sparse Solvers, Fast Fourier Transforms, Vector Math, and more. | +| LIBLINEAR | | +| [MPI-LIBLINEAR](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/distributed-liblinear/mpi/) | MPI LIBLINEAR is an extension of LIBLINEAR on distributed environments. The usage and the data format are the same as LIBLINEAR. It supports L2-regularized logistic regression, L2-regularized logistic regression, L2-regularized L2-loss linear SVM (primal trust-region Newton), L2-regularized L1-loss linear SVM (dual), L2-regularized logistic regression (primal limited common directions), L2-regularized L2-loss linear SVM (primal limited common directions). Module created by the PERMON Team (http://permon.it4i.cz). | +| [NLopt](http://ab-initio.mit.edu/wiki/index.php/NLopt) | NLopt is a free/open-source library for nonlinear optimization, providing a common interface for a number of different free optimization routines available online as well as original implementations of various other algorithms. | | [OpenBLAS](http://xianyi.github.com/OpenBLAS/) | OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. | +| PETSc | | +| [qrupdate](https://sourceforge.net/projects/qrupdate/) | qrupdate is a Fortran library for fast updates of QR and Cholesky decompositions. | | [ScaLAPACK](http://www.netlib.org/scalapack/) | The ScaLAPACK (or Scalable LAPACK) library includes a subset of LAPACK routines redesigned for distributed memory MIMD parallel computers. | - -## Nvidia - -| Module | Description | -| ------ | ----------- | -| cuda | | +| [SLEPc](http://www.grycap.upv.es/slepc/) | SLEPc (Scalable Library for Eigenvalue Problem Computations) is a software library for the solution of large scale sparse eigenvalue problems on parallel computers. It is an extension of PETSc and can be used for either standard or generalized eigenproblems, with real or complex arithmetic. It can also be used for computing a partial SVD of a large, sparse, rectangular matrix, and to solve quadratic eigenvalue problems. | +| [SuiteSparse](http://faculty.cse.tamu.edu/davis/suitesparse.html) | SuiteSparse is a collection of libraries manipulate sparse matrices. | +| [Trilinos](http://trilinos.sandia.gov/) | The Trilinos Project is an effort to develop algorithms and enabling technologies within an object-oriented software framework for the solution of large-scale, complex multi-physics engineering and scientific problems. A unique design feature of Trilinos is its focus on packages. | ## Omics @@ -283,33 +350,30 @@ | samtools | | | snpEff | | -## Oscar-Modulefiles - -| Module | Description | -| ------ | ----------- | - -## Oscar-Modules - -| Module | Description | -| ------ | ----------- | -| oscar-modules | | - ## Perf | Module | Description | | ------ | ----------- | +| [Advisor](https://software.intel.com/intel-advisor-xe) | Vectorization Optimization and Thread Prototyping - Vectorize & thread code or performance “dies” - Easy workflow + data + tips = faster code faster - Prioritize, Prototype & Predict performance gain | +| [Cube](http://www.scalasca.org/software/cube-4.x/download.html) | Cube, which is used as performance report explorer for Scalasca and Score-P, is a generic tool for displaying a multi-dimensional performance space consisting of the dimensions (i) performance metric, (ii) call path, and (iii) system resource. Each dimension can be represented as a tree, where non-leaf nodes of the tree can be collapsed or expanded to achieve the desired level of granularity. | | [OPARI2](http://www.score-p.org) | OPARI2, the successor of Forschungszentrum Juelich's OPARI, is a source-to-source instrumentation tool for OpenMP and hybrid codes. It surrounds OpenMP directives and runtime library calls with calls to the POMP2 measurement interface. | | [OTF2](http://www.score-p.org) | The Open Trace Format 2 is a highly scalable, memory efficient event trace data format plus support library. It is the new standard trace format for Scalasca, Vampir, and TAU and is open for other tools. | | [PAPI](http://icl.cs.utk.edu/projects/papi/) | PAPI provides the tool designer and application engineer with a consistent interface and methodology for use of the performance counter hardware found in most major microprocessors. PAPI enables software engineers to see, in near real time, the relation between software performance and processor events. In addition Component PAPI provides access to a collection of components that expose performance measurement opportunites across the hardware and software stack. | +| [PDT](http://www.cs.uoregon.edu/research/pdt/) | Program Database Toolkit (PDT) is a framework for analyzing source code written in several programming languages and for making rich program knowledge accessible to developers of static and dynamic analysis tools. PDT implements a standard program representation, the program database (PDB), that can be accessed in a uniform way through a class library supporting common PDB operations. | +| [Scalasca](http://www.scalasca.org/) | Scalasca is a software tool that supports the performance optimization of parallel programs by measuring and analyzing their runtime behavior. The analysis identifies potential performance bottlenecks -- in particular those concerning communication and synchronization -- and offers guidance in exploring their causes. | +| [Score-P](http://www.score-p.org) | The Score-P measurement infrastructure is a highly scalable and easy-to-use tool suite for profiling, event tracing, and online analysis of HPC applications. | | [Vampir](http://www.vampir.eu) | The Vampir software tool provides an easy-to-use framework that enables developers to quickly display and analyze arbitrary program behavior at any level of detail. The tool suite implements optimized event analysis algorithms and customizable displays that enable fast and interactive rendering of very complex performance monitoring data. | ## Phys | Module | Description | | ------ | ----------- | -| [phono3py](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | -| [phonopy](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | -| VASP | | +| [ALAMODE](http://alamode.readthedocs.io/) | ALAMODE is an open source software designed for analyzing lattice anharmonicity and lattice thermal conductivity of solids. By using an external DFT package such as VASP and Quantum ESPRESSO, you can extract harmonic and anharmonic force constants straightforwardly with ALAMODE. Using the calculated anharmonic force constants, you can also estimate lattice thermal conductivity, phonon linewidth, and other anharmonic phonon properties from first principles. | +| [DynaPhoPy](https://github.com/abelcarreras/DynaPhoPy) | Software to calculate crystal microscopic anharmonic properties from molecular dynamics (MD) using the normal-mode-decomposition technique. These properties include the phonon frequency shifts and linewidths, as well as the renormalized force constanst and thermal properties by using quasiparticle theory. This code includes interfaces for MD outputs from VASP and LAMMPS .PHONOPY code is used to obtain harmonic phonon modes. | +| [phono3py](http://phonopy.sourceforge.net/phono3py/index.html) | This software calculates phonon-phonon interaction related properties | +| [phonopy](http://phonopy.sourceforge.net/) | Phonopy is an open source package of phonon calculations based on the supercell approach. | +| [Siesta](http://departments.icmab.es/leem/siesta) | SIESTA is both a method and its computer program implementation, to perform efficient electronic structure calculations and ab initio molecular dynamics simulations of molecules and solids. This version si compiled with OpenMP and MPI support. | +| [VASP](http://www.vasp.at) | The Vienna Ab initio Simulation Package (VASP) is a computer program for atomic scale materials modelling, e.g. electronic structure calculations and quantum-mechanical molecular dynamics, from first principles. To use VASP, You need academic licenses from University of Wiena. Follow the instructions https://www.vasp.at/index.php/faqs. Then send us please a list of authorized users and their ID for which you need this access. Please use only http://support.it4i.cz/rt. We are responsible to verify your licenses. After succesfull verification You will be granted to use VASP in our enviroment. | ## Prace @@ -322,7 +386,7 @@ | Module | Description | | ------ | ----------- | -| CUDA | | +| [CUDA](https://developer.nvidia.com/cuda-toolkit) | CUDA (formerly Compute Unified Device Architecture) is a parallel computing platform and programming model created by NVIDIA and implemented by the graphics processing units (GPUs) that they produce. CUDA gives developers access to the virtual instruction set and memory of the parallel computational elements in CUDA GPUs. | | [hwloc](http://www.open-mpi.org/projects/hwloc/) | The Portable Hardware Locality (hwloc) software package provides a portable abstraction (across OS, versions, architectures, ...) of the hierarchical topology of modern architectures, including NUMA memory nodes, sockets, shared caches, cores and simultaneous multithreading. It also gathers various system attributes such as cache and memory information as well as the locality of I/O devices such as network interfaces, InfiniBand HCAs or GPUs. It primarily aims at helping applications with gathering information about modern computing hardware so as to exploit it accordingly and efficiently. | | [libpciaccess](http://cgit.freedesktop.org/xorg/lib/libpciaccess/) | Generic PCI access library. | @@ -338,13 +402,15 @@ | [iccifort](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel C, C++ and Fortran compilers | | [iimpi](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel C/C++ and Fortran compilers, alongside Intel MPI. | | intel | | +| [iomkl](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel Cluster Toolchain Compiler Edition provides Intel C/C++ and Fortran compilers, Intel MKL & OpenMPI. | +| [iompi](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel C/C++ and Fortran compilers, alongside Open MPI. | ## Tools | Module | Description | | ------ | ----------- | | advisor_xe | | -| ANSYS | | +| [ANSYS](http://www.ansys.com) | ANSYS simulation software enables organizations to confidently predict how their products will operate in the real world. We believe that every product is a promise of something greater. | | [APR](http://apr.apache.org/) | Apache Portable Runtime (APR) libraries. | | [APR-util](http://apr.apache.org/) | Apache Portable Runtime (APR) util libraries. | | [Bash](http://www.gnu.org/software/bash) | Bash is an sh-compatible command language interpreter that executes commands read from the standard input or from a file. Bash also incorporates useful features from the Korn and C shells (ksh and csh). | @@ -353,11 +419,13 @@ | cmake | | | cube | | | [cURL](http://curl.haxx.se) | libcurl is a free and easy-to-use client-side URL transfer library, supporting DICT, FILE, FTP, FTPS, Gopher, HTTP, HTTPS, IMAP, IMAPS, LDAP, LDAPS, POP3, POP3S, RTMP, RTSP, SCP, SFTP, SMTP, SMTPS, Telnet and TFTP. libcurl supports SSL certificates, HTTP POST, HTTP PUT, FTP uploading, HTTP form based upload, proxies, cookies, user+password authentication (Basic, Digest, NTLM, Negotiate, Kerberos), file transfer resume, http proxy tunneling and more. | -| dhi-mike | | +| [DMTCP](http://dmtcp.sourceforge.net/index.html) | DMTCP (Distributed MultiThreaded Checkpointing) transparently checkpoints a single-host or distributed computation in user-space -- with no modifications to user code or to the O/S. | | [EasyBuild](http://hpcugent.github.com/easybuild/) | EasyBuild is a software build and installation framework written in Python that allows you to install software in a structured, repeatable and robust way. | | elmer | | | [expat](http://expat.sourceforge.net/) | Expat is an XML parser library written in C. It is a stream-oriented parser in which an application registers handlers for things the parser might find in the XML document (like start tags) | +| [Ghostscript](http://ghostscript.com) | Ghostscript is a versatile processor for PostScript data with the ability to render PostScript to different targets. It used to be part of the cups printing stack, but is no longer used for that. | | git | | +| [GLPK](https://www.gnu.org/software/glpk/) | The GLPK (GNU Linear Programming Kit) package is intended for solving large-scale linear programming (LP), mixed integer programming (MIP), and other related problems. It is a set of routines written in ANSI C and organized in the form of a callable library. | | gnuplot | | | grace | | | [gzip](http://www.gnu.org/software/gzip/) | gzip (GNU zip) is a popular data compression program as a replacement for compress | @@ -376,6 +444,7 @@ | MIKE | | | mono | | | mpi.net | | +| [networkx](https://pypi.python.org/pypi/networkx) | NetworkX is a Python package for the creation, manipulation, and study of the structure, dynamics, and functions of complex networks. | | [numactl](http://oss.sgi.com/projects/libnuma/) | The numactl program allows you to run your application program on specific cpu's and memory nodes. It does this by supplying a NUMA memory policy to the operating system before running your program. The libnuma library provides convenient ways for you to add NUMA memory policies into your own program. | | octave | | | opari2 | | @@ -388,15 +457,13 @@ | R | | | racket | | | relion | | -| Rstudio | | +| [RStudio](https://www.rstudio.com) | RStudio is a set of integrated tools designed to help you be more productive with R. It includes a console, syntax-highlighting editor that supports direct code execution, as well as tools for plotting, history, debugging and workspace management. | | ruby | | -| scalasca2 | | | scite | | -| scorep | | | [Serf](http://serf.apache.org/) | The serf library is a high performance C-based HTTP client library built upon the Apache Portable Runtime (APR) library | -| Singularity | | +| [Singularity](http://gmkurtzer.github.io/singularity) | Singularity is a container platform focused on supporting "Mobility of Compute" | | [Subversion](http://subversion.apache.org/) | Subversion is an open source version control system. | -| [Szip](http://www.hdfgroup.org/doc_resource/SZIP/) | Szip compression software, providing lossless compression of scientific data | +| Szip | | | tcl | | | [tcsh](http://www.tcsh.org) | Tcsh is an enhanced, but completely compatible version of the Berkeley UNIX C shell (csh). It is a command language interpreter usable both as an interactive login shell and a shell script command processor. It includes a command-line editor, programmable word completion, spelling correction, a history mechanism, job control and a C-like syntax. | | tk | | @@ -407,7 +474,7 @@ | valgrind | | | vampir | | | virtualgl | | -| VTune | | +| [VTune](http://software.intel.com/en-us/intel-vtune-amplifier-xe) | Intel VTune Amplifier XE 2016 is the premier performance profiler for C, C++, C#, Fortran, Assembly and Java. | | vtune_xe | | | [XZ](http://tukaani.org/xz/) | xz: XZ utilities | @@ -423,16 +490,29 @@ | Module | Description | | ------ | ----------- | +| [ATK](https://developer.gnome.org/ATK/stable/) | ATK provides the set of accessibility interfaces that are implemented by other toolkits and applications. Using the ATK interfaces, accessibility tools have full access to view and control running applications. | | [cairo](http://cairographics.org) | Cairo is a 2D graphics library with support for multiple output devices. Currently supported output targets include the X Window System (via both Xlib and XCB), Quartz, Win32, image buffers, PostScript, PDF, and SVG file output. Experimental backends include OpenGL, BeOS, OS/2, and DirectFB | | [ffmpeg](https://www.ffmpeg.org/) | A complete, cross-platform solution to record, convert and stream audio and video. | +| [FFmpeg](https://www.ffmpeg.org/) | A complete, cross-platform solution to record, convert and stream audio and video. | | [fixesproto](http://www.freedesktop.org/wiki/Software/xlibs) | X.org FixesProto protocol headers. | | [FLTK](http://www.fltk.org) | FLTK is a cross-platform C++ GUI toolkit for UNIX/Linux (X11), Microsoft Windows, and MacOS X. FLTK provides modern GUI functionality without the bloat and supports 3D graphics via OpenGL and its built-in GLUT emulation. | | [fontconfig](http://www.freedesktop.org/software/fontconfig) | Fontconfig is a library designed to provide system-wide font configuration, customization and application access. | | [freetype](http://freetype.org) | FreeType 2 is a software font engine that is designed to be small, efficient, highly customizable, and portable while capable of producing high-quality output (glyph images). It can be used in graphics libraries, display servers, font conversion tools, text image generation tools, and many other products as well. | +| [Gdk-Pixbuf](https://developer.gnome.org/gdk-pixbuf/stable/) | The Gdk Pixbuf is a toolkit for image loading and pixel buffer manipulation. It is used by GTK+ 2 and GTK+ 3 to load and manipulate images. In the past it was distributed as part of GTK+ 2 but it was split off into a separate package in preparation for the change to GTK+ 3. | | gettext | | +| [GL2PS](http://www.geuz.org/gl2ps/) | GL2PS: an OpenGL to PostScript printing library | | [GLib](http://www.gtk.org/) | GLib is one of the base libraries of the GTK+ project | +| [gnuplot](http://gnuplot.sourceforge.net/) | Portable interactive, function plotting utility | +| [grace](http://freecode.com/projects/grace) | Grace is a WYSIWYG 2D plotting tool for X Windows System and Motif. | +| [GraphicsMagick](http://www.graphicsmagick.org/) | GraphicsMagick is the swiss army knife of image processing. | +| [GST-plugins-base](http://gstreamer.freedesktop.org/) | GStreamer is a library for constructing graphs of media-handling components. The applications it supports range from simple Ogg/Vorbis playback, audio/video streaming to complex audio (mixing) and video (non-linear editing) processing. | +| [GStreamer](http://gstreamer.freedesktop.org/) | GStreamer is a library for constructing graphs of media-handling components. The applications it supports range from simple Ogg/Vorbis playback, audio/video streaming to complex audio (mixing) and video (non-linear editing) processing. | +| [GTK+](https://developer.gnome.org/gtk+/stable/) | The GTK+ 2 package contains libraries used for creating graphical user interfaces for applications. | +| [HarfBuzz](http://www.freedesktop.org/wiki/Software/HarfBuzz) | HarfBuzz is an OpenType text shaping engine. | | [inputproto](http://www.freedesktop.org/wiki/Software/xlibs) | X.org InputProto protocol headers. | +| [JasPer](http://www.ece.uvic.ca/~frodo/jasper/) | The JasPer Project is an open-source initiative to provide a free software-based reference implementation of the codec specified in the JPEG-2000 Part-1 standard. | | [kbproto](http://www.freedesktop.org/wiki/Software/xlibs) | X.org KBProto protocol headers. | +| libGLU | | | [libICE](http://www.freedesktop.org/wiki/Software/xlibs) | X Inter-Client Exchange library for freedesktop.org | | [libX11](http://www.freedesktop.org/wiki/Software/xlibs) | X11 client-side library | | [libXau](http://www.freedesktop.org/wiki/Software/xlibs) | The libXau package contains a library implementing the X11 Authorization Protocol. This is useful for restricting client access to the display. | @@ -440,6 +520,21 @@ | [libXext](http://www.freedesktop.org/wiki/Software/xlibs) | Common X Extensions library | | [libXfixes](http://www.freedesktop.org/wiki/Software/xlibs) | X Fixes extension library | | [libXfont](http://www.freedesktop.org/wiki/Software/xlibs) | X font libary | +| libXinerama | | | [libXt](http://www.freedesktop.org/wiki/Software/xlibs) | libXt provides the X Toolkit Intrinsics, an abstract widget library upon which other toolkits are based. Xt is the basis for many toolkits, including the Athena widgets (Xaw), and LessTif (a Motif implementation). | +| [matplotlib](http://matplotlib.org) | matplotlib is a python 2D plotting library which produces publication quality figures in a variety of hardcopy formats and interactive environments across platforms. matplotlib can be used in python scripts, the python and ipython shell, web application servers, and six graphical user interface toolkits. | +| Mesa | | +| [motif](http://motif.ics.com/) | Motif refers to both a graphical user interface (GUI) specification and the widget toolkit for building applications that follow that specification under the X Window System on Unix and other POSIX-compliant systems. It was the standard toolkit for the Common Desktop Environment and thus for Unix. | +| [OpenCV](http://opencv.org/) | OpenCV (Open Source Computer Vision Library) is an open source computer vision and machine learning software library. OpenCV was built to provide a common infrastructure for computer vision applications and to accelerate the use of machine perception in the commercial products. | +| [opencv-python](http://deeplearning.net/software/theano) | Theano is a Python library that allows you to define, optimize, and evaluate mathematical expressions involving multi-dimensional arrays efficiently. | +| [OSPRay](http://www.ospray.org) | A Ray Tracing Based Rendering Engine for High-Fidelity Visualization | +| [Pango](http://www.pango.org/) | Pango is a library for laying out and rendering of text, with an emphasis on internationalization. Pango can be used anywhere that text layout is needed, though most of the work on Pango so far has been done in the context of the GTK+ widget toolkit. Pango forms the core of text and font handling for GTK+-2.x. | +| [ParaView](http://www.paraview.org) | ParaView is a scientific parallel visualizer. | +| [Pillow](http://pillow.readthedocs.org/) | Pillow is the 'friendly PIL fork' by Alex Clark and Contributors. PIL is the Python Imaging Library by Fredrik Lundh and Contributors. | | [pixman](http://www.pixman.org/) | Pixman is a low-level software library for pixel manipulation, providing features such as image compositing and trapezoid rasterization. Important users of pixman are the cairo graphics library and the X server. | +| [scikit-image](http://scikit-learn.org/stable/index.html) | Scikit-learn integrates machine learning algorithms in the tightly-knit scientific Python world, building upon numpy, scipy, and matplotlib. As a machine-learning module, it provides versatile tools for data mining and analysis in any field of science and engineering. It strives to be simple and efficient, accessible to everybody, and reusable in various contexts. | +| [seekpath](https://github.com/giovannipizzi/seekpath) | SeeK-path is a python module to obtain and visualize band paths in the Brillouin zone of crystal structures. The definition of k-point labels follows crystallographic convention, as defined and discussed in the HPKOT paper. Moreover, the Bravais lattice is detected properly using the spacegroup symmetry. Also the suggested band path provided in the HPKOT paper is returned. Systems without time-reversal and inversion-symmetry are also properly taken into account. | | [Tk](http://www.tcl.tk/) | Tk is an open source, cross-platform widget toolchain that provides a library of basic elements for building a graphical user interface (GUI) in many different programming languages. | +| [VTK](http://www.vtk.org) | The Visualization Toolkit (VTK) is an open-source, freely available software system for 3D computer graphics, image processing and visualization. VTK consists of a C++ class library and several interpreted interface layers including Tcl/Tk, Java, and Python. VTK supports a wide variety of visualization algorithms including: scalar, vector, tensor, texture, and volumetric methods; and advanced modeling techniques such as: implicit modeling, polygon reduction, mesh smoothing, cutting, contouring, and Delaunay triangulation. | +| X11 | | +| [x264](http://www.videolan.org/developers/x264.html) | x264 is a free software library and application for encoding video streams into the H.264/MPEG-4 AVC compression format, and is released under the terms of the GNU GPL. | diff --git a/docs.it4i/modules-salomon-phi.md b/docs.it4i/modules-salomon-phi.md new file mode 100644 index 0000000000000000000000000000000000000000..0eb79e5a8cb916d7571f8e81a198516166ecdb96 --- /dev/null +++ b/docs.it4i/modules-salomon-phi.md @@ -0,0 +1,63 @@ +# Available Modules + +## Compiler + +| Module | Description | +| ------ | ----------- | +| [icc](http://software.intel.com/en-us/intel-compilers/) | Intel C and C++ compilers | + +## Devel + +| Module | Description | +| ------ | ----------- | +| devel_environment | | +| M4 | | +| ncurses | | + +## Lang + +| Module | Description | +| ------ | ----------- | +| [Bison](http://www.gnu.org/software/bison) | Bison is a general-purpose parser generator that converts an annotated context-free grammar into a deterministic LR or generalized LR (GLR) parser employing LALR(1) parser tables. | +| [flex](http://flex.sourceforge.net/) | Flex (Fast Lexical Analyzer) is a tool for generating scanners. A scanner, sometimes called a tokenizer, is a program which recognizes lexical patterns in text. | +| [Tcl](http://www.tcl.tk/) | Tcl (Tool Command Language) is a very powerful but easy to learn dynamic programming language, suitable for a very wide range of uses, including web and desktop applications, networking, administration, testing and many more. | + +## Lib + +| Module | Description | +| ------ | ----------- | +| [libreadline](http://cnswww.cns.cwru.edu/php/chet/readline/rltop.html) | The GNU Readline library provides a set of functions for use by applications that allow users to edit command lines as they are typed in. Both Emacs and vi editing modes are available. The Readline library includes additional functions to maintain a list of previously-entered command lines, to recall and perhaps reedit those lines, and perform csh-like history expansion on previous commands. | +| [zlib](http://www.zlib.net/) | zlib is designed to be a free, general-purpose, legally unencumbered -- that is, not covered by any patents -- lossless data-compression library for use on virtually any computer hardware and operating system. | + +## Math + +| Module | Description | +| ------ | ----------- | +| [Octave](http://www.gnu.org/software/octave/) | GNU Octave is a high-level interpreted language, primarily intended for numerical computations. | + +## Mpi + +| Module | Description | +| ------ | ----------- | +| [impi](http://software.intel.com/en-us/intel-mpi-library/) | Intel MPI Library, compatible with MPICH ABI | + +## Toolchain + +| Module | Description | +| ------ | ----------- | +| [iccifort](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel C, C++ & Fortran compilers | +| [ifort](http://software.intel.com/en-us/intel-compilers/) | Intel Fortran compiler | + +## Tools + +| Module | Description | +| ------ | ----------- | +| bzip2 | | +| cURL | | +| [expat](http://expat.sourceforge.net/) | Expat is an XML parser library written in C. It is a stream-oriented parser in which an application registers handlers for things the parser might find in the XML document (like start tags) | + +## Vis + +| Module | Description | +| ------ | ----------- | +| gettext | | diff --git a/docs.it4i/modules-salomon-uv.md b/docs.it4i/modules-salomon-uv.md index 1b0f652a5e4c33f96285dde9231fc6b3e05906c1..92bd1ea8a1079718b50f5e6116d6c57aa5404215 100644 --- a/docs.it4i/modules-salomon-uv.md +++ b/docs.it4i/modules-salomon-uv.md @@ -3,7 +3,7 @@ ## Bio | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/download.html) | A quality control application for high throughput sequence data | | [GATK](http://www.broadinstitute.org/gatk/) | The Genome Analysis Toolkit or GATK is a software package developed at the Broad Institute to analyse next-generation resequencing data. The toolkit offers a wide variety of tools, with a primary focus on variant discovery and genotyping as well as strong emphasis on data quality assurance. Its robust architecture, powerful processing engine and high-performance computing features make it capable of taking on projects of any size. | | [SnpEff](http://snpeff.sourceforge.net/) | Genetic variant annotation and effect prediction toolbox. | @@ -11,14 +11,14 @@ ## Cae | Module | Description | -| -------| ----------- | -| ANSYS | | +| ------ | ----------- | +| COMSOL | | | [OpenFOAM](http://www.openfoam.com/) | OpenFOAM is a free, open source CFD software package. OpenFOAM has an extensive range of features to solve anything from complex fluid flows involving chemical reactions, turbulence and heat transfer, to solid dynamics and electromagnetics. | ## Chem | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [ABINIT](http://www.abinit.org/) | Abinit is a plane wave pseudopotential code for doing condensed phase electronic structure calculations using DFT. | | [Libint](https://sourceforge.net/p/libint/) | Libint library is used to evaluate the traditional (electron repulsion) and certain novel two-body matrix elements (integrals) over Cartesian Gaussian functions used in modern atomic and molecular theory. | | [libxc](http://www.tddft.org/programs/octopus/wiki/index.php/Libxc) | Libxc is a library of exchange-correlation functionals for density-functional theory. The aim is to provide a portable, well tested and reliable set of exchange and correlation functionals. | @@ -26,25 +26,26 @@ ## Compiler | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [GCC](http://gcc.gnu.org/) | The GNU Compiler Collection includes front ends for C, C++, Objective-C, Fortran, Java, and Ada, as well as libraries for these languages (libstdc++, libgcj,...). | | GCCcore | | -| [icc](http://software.intel.com/en-us/intel-compilers/) | C and C++ compiler from Intel | +| icc | | | [ifort](http://software.intel.com/en-us/intel-compilers/) | Fortran compiler from Intel | -| LLVM | | +| [LLVM](http://llvm.org/) | The LLVM Core libraries provide a modern source- and target-independent optimizer, along with code generation support for many popular CPUs (as well as some less common ones!) These libraries are built around a well specified code representation known as the LLVM intermediate representation ("LLVM IR"). The LLVM Core libraries are well documented, and it is particularly easy to invent your own language (or port an existing compiler) to use LLVM as an optimizer and code generator. | ## Data | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [GDAL](http://www.gdal.org/) | GDAL is a translator library for raster geospatial data formats that is released under an X/MIT style Open Source license by the Open Source Geospatial Foundation. As a library, it presents a single abstract data model to the calling application for all supported formats. It also comes with a variety of useful commandline utilities for data translation and processing. | | [HDF5](http://www.hdfgroup.org/HDF5/) | HDF5 is a unique technology suite that makes possible the management of extremely large and complex data collections. | | [netCDF](http://www.unidata.ucar.edu/software/netcdf/) | NetCDF (network Common Data Form) is a set of software libraries and machine-independent data formats that support the creation, access, and sharing of array-oriented scientific data. | +| [netCDF-Fortran](http://www.unidata.ucar.edu/software/netcdf/) | NetCDF (network Common Data Form) is a set of software libraries and machine-independent data formats that support the creation, access, and sharing of array-oriented scientific data. | ## Devel | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [Autoconf](http://www.gnu.org/software/autoconf/) | Autoconf is an extensible package of M4 macros that produce shell scripts to automatically configure software source code packages. These scripts can adapt the packages to many kinds of UNIX-like systems without manual user intervention. Autoconf creates a configuration script for a package from a template file that lists the operating system features that the package can use, in the form of M4 macro calls. | | [Automake](http://www.gnu.org/software/automake/automake.html) | Automake: GNU Standards-compliant Makefile generator | | [Autotools](http://autotools.io) | This bundle collect the standard GNU build tools: Autoconf, Automake and libtool | @@ -53,19 +54,28 @@ | [Doxygen](http://www.doxygen.org) | Doxygen is a documentation system for C++, C, Java, Objective-C, Python, IDL (Corba and Microsoft flavors), Fortran, VHDL, PHP, C#, and to some extent D. | | [M4](http://www.gnu.org/software/m4/m4.html) | GNU M4 is an implementation of the traditional Unix macro processor. It is mostly SVR4 compatible although it has some extensions (for example, handling more than 9 positional parameters to macros). GNU M4 also has built-in functions for including files, running shell commands, doing arithmetic, etc. | | [make](http://www.gnu.org/software/make/make.html) | make-3.82: GNU version of make utility | +| [Mako](http://www.makotemplates.org) | A super-fast templating language that borrows the best ideas from the existing templating languages | | [Maven](http://maven.apache.org/index.html) | Binary maven install, Apache Maven is a software project management and comprehension tool. Based on the concept of a project object model (POM), Maven can manage a project's build, reporting and documentation from a central piece of information. | | [ncurses](http://www.gnu.org/software/ncurses/) | The Ncurses (new curses) library is a free software emulation of curses in System V Release 4.0, and more. It uses Terminfo format, supports pads and color and multiple highlights and forms characters and function-key mapping, and has all the other SYSV-curses enhancements over BSD Curses. | +| [PCRE](http://www.pcre.org/) | The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5. | +| [pkg-config](http://www.freedesktop.org/wiki/Software/pkg-config/) | pkg-config is a helper tool used when compiling applications and libraries. It helps you insert the correct compiler options on the command line so an application can use gcc -o test test.c `pkg-config --libs --cflags glib-2.0` for instance, rather than hard-coding values on where to find glib (or other libraries). | +| [Qt](http://qt-project.org/) | Qt is a comprehensive cross-platform C++ application framework. | +| [Qt5](http://qt.io/) | Qt is a comprehensive cross-platform C++ application framework. | | [SQLite](http://www.sqlite.org/) | SQLite: SQL Database Engine in a C Library | +| [SWIG](http://www.swig.org/) | SWIG is a software development tool that connects programs written in C and C++ with a variety of high-level programming languages. | +| [xorg-macros](http://cgit.freedesktop.org/xorg/util/macros) | X.org macros utilities. | ## Lang | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [Bison](http://www.gnu.org/software/bison) | Bison is a general-purpose parser generator that converts an annotated context-free grammar into a deterministic LR or generalized LR (GLR) parser employing LALR(1) parser tables. | -| [flex](http://flex.sourceforge.net/) | Flex (Fast Lexical Analyzer) is a tool for generating scanners. A scanner, sometimes called a tokenizer, is a program which recognizes lexical patterns in text. | +| flex | | | [Java](http://java.com/) | Java Platform, Standard Edition (Java SE) lets you develop and deploy Java applications on desktops and servers. | | [Lua](http://www.lua.org/) | Lua is a powerful, fast, lightweight, embeddable scripting language. Lua combines simple procedural syntax with powerful data description constructs based on associative arrays and extensible semantics. Lua is dynamically typed, runs by interpreting bytecode for a register-based virtual machine, and has automatic memory management with incremental garbage collection, making it ideal for configuration, scripting, and rapid prototyping. | +| [Mono](http://mono-framework.com) | An open source, cross-platform, implementation of C# and the CLR that is binary compatible with Microsoft.NET. | | [NASM](http://www.nasm.us/) | NASM: General-purpose x86 assembler | +| [OpenCL-runtime](https://software.intel.com/en-us/intel-opencl) | OpenCL™ is the first open, royalty-free standard for cross-platform, parallel programming of modern processors found in personal computers, servers and handheld/embedded devices. OpenCL (Open Computing Language) greatly improves speed and responsiveness for a wide spectrum of applications in numerous market categories from gaming and entertainment to scientific and medical software. | | [Perl](http://www.perl.org/) | Larry Wall's Practical Extraction and Report Language | | [Python](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | | [R](http://www.r-project.org/) | R is a free software environment for statistical computing and graphics. | @@ -74,13 +84,18 @@ ## Lib | Module | Description | -| -------| ----------- | +| ------ | ----------- | +| [libdrm](http://dri.freedesktop.org) | Direct Rendering Manager runtime library. | | [libffi](http://sourceware.org/libffi/) | The libffi library provides a portable, high level programming interface to various calling conventions. This allows a programmer to call any function specified by a call interface description at run-time. | | [libjpeg-turbo](http://sourceforge.net/libjpeg-turbo/) | libjpeg-turbo is a fork of the original IJG libjpeg which uses SIMD to accelerate baseline JPEG compression and decompression. libjpeg is a library that implements JPEG image encoding, decoding and transcoding. | | [libpng](http://www.libpng.org/pub/png/libpng.html) | libpng is the official PNG reference library | +| [libpthread-stubs](http://xcb.freedesktop.org/) | The X protocol C-language Binding (XCB) is a replacement for Xlib featuring a small footprint, latency hiding, direct access to the protocol, improved threading support, and extensibility. | | [libreadline](http://cnswww.cns.cwru.edu/php/chet/readline/rltop.html) | The GNU Readline library provides a set of functions for use by applications that allow users to edit command lines as they are typed in. Both Emacs and vi editing modes are available. The Readline library includes additional functions to maintain a list of previously-entered command lines, to recall and perhaps reedit those lines, and perform csh-like history expansion on previous commands. | -| [libtool](http://www.gnu.org/software/libtool) | GNU libtool is a generic library support script. Libtool hides the complexity of using shared libraries behind a consistent, portable interface. | +| [libsndfile](http://www.mega-nerd.com/libsndfile) | Libsndfile is a C library for reading and writing files containing sampled sound (such as MS Windows WAV and the Apple/SGI AIFF format) through one standard library interface. | +| [LibTIFF](http://www.remotesensing.org/libtiff/) | tiff: Library and tools for reading and writing TIFF data files | +| libtool | | | [libxml2](http://xmlsoft.org/) | Libxml2 is the XML C parser and toolchain developed for the Gnome project (but usable outside of the Gnome platform). | +| [nettle](http://www.lysator.liu.se/~nisse/nettle/) | Nettle is a cryptographic library that is designed to fit easily in more or less any context: In crypto toolkits for object-oriented languages (C++, Python, Pike, ...), in applications like LSH or GNUPG, or even in kernel space. | | [PROJ](http://trac.osgeo.org/proj/) | Program proj is a standard Unix filter function which converts geographic longitude and latitude coordinates into cartesian coordinates | | [tbb](http://software.intel.com/en-us/articles/intel-tbb/) | Intel Threading Building Blocks 4.0 (Intel TBB) is a widely used, award-winning C++ template library for creating reliable, portable, and scalable parallel applications. Use Intel TBB for a simple and rapid way of developing robust task-based parallel applications that scale to available processor cores, are compatible with multiple environments, and are easier to maintain. Intel TBB is the most proficient way to implement future-proof parallel applications that tap into the power and performance of multicore and manycore hardware platforms. | | [zlib](http://www.zlib.net/) | zlib is designed to be a free, general-purpose, legally unencumbered -- that is, not covered by any patents -- lossless data-compression library for use on virtually any computer hardware and operating system. | @@ -88,42 +103,51 @@ ## Math | Module | Description | -| -------| ----------- | +| ------ | ----------- | +| [Eigen](http://eigen.tuxfamily.org/index.php?title=Main_Page) | Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms. | +| [GEOS](http://trac.osgeo.org/geos) | GEOS (Geometry Engine - Open Source) is a C++ port of the Java Topology Suite (JTS) | | GMP | | +| [METIS](http://glaros.dtc.umn.edu/gkhome/metis/metis/overview) | METIS is a set of serial programs for partitioning graphs, partitioning finite element meshes, and producing fill reducing orderings for sparse matrices. The algorithms implemented in METIS are based on the multilevel recursive-bisection, multilevel k-way, and multi-constraint partitioning schemes. | +| [MPFR](http://www.mpfr.org) | The MPFR library is a C library for multiple-precision floating-point computations with correct rounding. | | [SCOTCH](http://gforge.inria.fr/projects/scotch/) | Software package and libraries for sequential and parallel graph partitioning, static mapping, and sparse matrix block ordering, and sequential mesh and hypergraph partitioning. | ## Mpi | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [impi](http://software.intel.com/en-us/intel-mpi-library/) | The Intel(R) MPI Library for Linux* OS is a multi-fabric message passing library based on ANL MPICH2 and OSU MVAPICH2. The Intel MPI Library for Linux OS implements the Message Passing Interface, version 2 (MPI-2) specification. | | [OpenMPI](http://www.open-mpi.org/) | The Open MPI Project is an open source MPI-2 implementation. | ## Numlib | Module | Description | -| -------| ----------- | +| ------ | ----------- | +| [CGAL](http://www.cgal.org/) | The goal of the CGAL Open Source Project is to provide easy access to efficient and reliable geometric algorithms in the form of a C++ library. | +| [cuDNN](https://developer.nvidia.com/cudnn) | The NVIDIA CUDA Deep Neural Network library (cuDNN) is a GPU-accelerated library of primitives for deep neural networks. | | [FFTW](http://www.fftw.org) | FFTW is a C subroutine library for computing the discrete Fourier transform (DFT) in one or more dimensions, of arbitrary input size, and of both real and complex data. | | [imkl](http://software.intel.com/en-us/intel-mkl/) | Intel Math Kernel Library is a library of highly optimized, extensively threaded math routines for science, engineering, and financial applications that require maximum performance. Core math functions include BLAS, LAPACK, ScaLAPACK, Sparse Solvers, Fast Fourier Transforms, Vector Math, and more. | +| [NLopt](http://ab-initio.mit.edu/wiki/index.php/NLopt) | NLopt is a free/open-source library for nonlinear optimization, providing a common interface for a number of different free optimization routines available online as well as original implementations of various other algorithms. | | [OpenBLAS](http://xianyi.github.com/OpenBLAS/) | OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. | | [ScaLAPACK](http://www.netlib.org/scalapack/) | The ScaLAPACK (or Scalable LAPACK) library includes a subset of LAPACK routines redesigned for distributed memory MIMD parallel computers. | ## Phys | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [VASP](http://www.vasp.at) | The Vienna Ab initio Simulation Package (VASP) is a computer program for atomic scale materials modelling, e.g. electronic structure calculations and quantum-mechanical molecular dynamics, from first principles. | ## System | Module | Description | -| -------| ----------- | +| ------ | ----------- | +| CUDA | | | [hwloc](http://www.open-mpi.org/projects/hwloc/) | The Portable Hardware Locality (hwloc) software package provides a portable abstraction (across OS, versions, architectures, ...) of the hierarchical topology of modern architectures, including NUMA memory nodes, sockets, shared caches, cores and simultaneous multithreading. It also gathers various system attributes such as cache and memory information as well as the locality of I/O devices such as network interfaces, InfiniBand HCAs or GPUs. It primarily aims at helping applications with gathering information about modern computing hardware so as to exploit it accordingly and efficiently. | +| [libpciaccess](http://cgit.freedesktop.org/xorg/lib/libpciaccess/) | Generic PCI access library. | ## Toolchain | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [foss]((none)) | GNU Compiler Collection (GCC) based compiler toolchain, including OpenMPI for MPI support, OpenBLAS (BLAS and LAPACK support), FFTW and ScaLAPACK. | | [GNU](http://www.gnu.org/software/) | Compiler-only toolchain with GCC and binutils. | | [gompi]((none)) | GNU Compiler Collection (GCC) based compiler toolchain, including OpenMPI for MPI support. | @@ -135,32 +159,44 @@ ## Tools | Module | Description | -| -------| ----------- | +| ------ | ----------- | | [Bash](http://www.gnu.org/software/bash) | Bash is an sh-compatible command language interpreter that executes commands read from the standard input or from a file. Bash also incorporates useful features from the Korn and C shells (ksh and csh). | | [binutils](http://directory.fsf.org/project/binutils/) | binutils: GNU binary utilities | | [bzip2](http://www.bzip.org/) | bzip2 is a freely available, patent free, high-quality data compressor. It typically compresses files to within 10% to 15% of the best available techniques (the PPM family of statistical compressors), whilst being around twice as fast at compression and six times faster at decompression. | | [cURL](http://curl.haxx.se) | libcurl is a free and easy-to-use client-side URL transfer library, supporting DICT, FILE, FTP, FTPS, Gopher, HTTP, HTTPS, IMAP, IMAPS, LDAP, LDAPS, POP3, POP3S, RTMP, RTSP, SCP, SFTP, SMTP, SMTPS, Telnet and TFTP. libcurl supports SSL certificates, HTTP POST, HTTP PUT, FTP uploading, HTTP form based upload, proxies, cookies, user+password authentication (Basic, Digest, NTLM, Negotiate, Kerberos), file transfer resume, http proxy tunneling and more. | -| [EasyBuild](http://hpcugent.github.com/easybuild/) | EasyBuild is a software build and installation framework written in Python that allows you to install software in a structured, repeatable and robust way. | +| [DMTCP](http://dmtcp.sourceforge.net/index.html) | DMTCP (Distributed MultiThreaded Checkpointing) transparently checkpoints a single-host or distributed computation in user-space -- with no modifications to user code or to the O/S. | +| EasyBuild | | | [expat](http://expat.sourceforge.net/) | Expat is an XML parser library written in C. It is a stream-oriented parser in which an application registers handlers for things the parser might find in the XML document (like start tags) | -| git | | +| [git](http://git-scm.com/) | Git is a free and open source distributed version control system designed to handle everything from small to very large projects with speed and efficiency. | | [gzip](http://www.gnu.org/software/gzip/) | gzip (GNU zip) is a popular data compression program as a replacement for compress | -| help2man | | | MATLAB | | | [Mercurial](http://mercurial.selenic.com/) | Mercurial is a free, distributed source control management tool. It efficiently handles projects of any size and offers an easy and intuitive interface. | | [numactl](http://oss.sgi.com/projects/libnuma/) | The numactl program allows you to run your application program on specific cpu's and memory nodes. It does this by supplying a NUMA memory policy to the operating system before running your program. The libnuma library provides convenient ways for you to add NUMA memory policies into your own program. | | pigz | | | [QEMU](http://wiki.qemu.org/Main_Page) | QEMU is a generic and open source machine emulator and virtualizer. | +| [RStudio](https://www.rstudio.com) | RStudio is a set of integrated tools designed to help you be more productive with R. It includes a console, syntax-highlighting editor that supports direct code execution, as well as tools for plotting, history, debugging and workspace management. | +| Singularity | | | [Szip](http://www.hdfgroup.org/doc_resource/SZIP/) | Szip compression software, providing lossless compression of scientific data | | [tcsh](http://www.tcsh.org) | Tcsh is an enhanced, but completely compatible version of the Berkeley UNIX C shell (csh). It is a command language interpreter usable both as an interactive login shell and a shell script command processor. It includes a command-line editor, programmable word completion, spelling correction, a history mechanism, job control and a C-like syntax. | +| [util-linux](http://www.kernel.org/pub/linux/utils/util-linux) | Set of Linux utilities | | [VDE2](http://vde.sourceforge.net) | VDE is an ethernet compliant virtual network that can be spawned over a set of physical computer over the Internet. VDE is part of virtualsquare project. | | [VTune](http://software.intel.com/en-us/intel-vtune-amplifier-xe) | Intel VTune Amplifier XE 2016 is the premier performance profiler for C, C++, C#, Fortran, Assembly and Java. | -| XZ | | +| [XZ](http://tukaani.org/xz/) | xz: XZ utilities | ## Vis | Module | Description | -| -------| ----------- | +| ------ | ----------- | +| [cairo](http://cairographics.org) | Cairo is a 2D graphics library with support for multiple output devices. Currently supported output targets include the X Window System (via both Xlib and XCB), Quartz, Win32, image buffers, PostScript, PDF, and SVG file output. Experimental backends include OpenGL, BeOS, OS/2, and DirectFB | +| [fontconfig](http://www.freedesktop.org/software/fontconfig) | Fontconfig is a library designed to provide system-wide font configuration, customization and application access. | +| [freetype](http://freetype.org) | FreeType 2 is a software font engine that is designed to be small, efficient, highly customizable, and portable while capable of producing high-quality output (glyph images). It can be used in graphics libraries, display servers, font conversion tools, text image generation tools, and many other products as well. | | [gettext](http://www.gnu.org/software/gettext/) | GNU `gettext' is an important step for the GNU Translation Project, as it is an asset on which we may build many other steps. This package offers to programmers, translators, and even users, a well integrated set of tools and documentation | | [GLib](http://www.gtk.org/) | GLib is one of the base libraries of the GTK+ project | +| [JasPer](http://www.ece.uvic.ca/~frodo/jasper/) | The JasPer Project is an open-source initiative to provide a free software-based reference implementation of the codec specified in the JPEG-2000 Part-1 standard. | +| [libGLU](ftp://ftp.freedesktop.org/pub/mesa/glu/) | The OpenGL Utility Library (GLU) is a computer graphics library for OpenGL. | +| [Mesa](http://www.mesa3d.org/) | Mesa is an open-source implementation of the OpenGL specification - a system for rendering interactive 3D graphics. | +| [ParaView](http://www.paraview.org) | ParaView is a scientific parallel visualizer. | +| [pixman](http://www.pixman.org/) | Pixman is a low-level software library for pixel manipulation, providing features such as image compositing and trapezoid rasterization. Important users of pixman are the cairo graphics library and the X server. | | [Tk](http://www.tcl.tk/) | Tk is an open source, cross-platform widget toolchain that provides a library of basic elements for building a graphical user interface (GUI) in many different programming languages. | | [VisIt](https://wci.llnl.gov/simulation/computer-codes/visit) | VisIt is an Open Source, interactive, scalable, visualization, animation and analysis tool | +| [X11](https://www.x.org) | The X Window System (X11) is a windowing system for bitmap displays | diff --git a/docs.it4i/modules-salomon.md b/docs.it4i/modules-salomon.md index 088097b961c8abe8040eeb929f94bfd84b7ab5ad..9cff5aa271d2fed8f6e3d6cc9978303008069f9e 100644 --- a/docs.it4i/modules-salomon.md +++ b/docs.it4i/modules-salomon.md @@ -1,394 +1,19 @@ # Available Modules -## Core - -| Module | Description | -| ------ | ----------- | -| lmod | | -| settarg | | - -## Bio - -| Module | Description | -| ------ | ----------- | -| [almost](http://www-almost.ch.cam.ac.uk/site) | all atom molecular simulation toolkit - is a fast and flexible molecular modeling environment that provides powerful and efficient algorithms for molecular simulation, homology modeling, de novo design and ab-initio calculations. | -| [Amber](http://ambermd.org) | A set of molecular mechanical force fields for the simulation of biomolecules | -| [BCFtools](http://www.htslib.org/) | Samtools is a suite of programs for interacting with high-throughput sequencing data. BCFtools - Reading/writing BCF2/VCF/gVCF files and calling/filtering/summarising SNP and short indel sequence variants | -| [BWA](http://bio-bwa.sourceforge.net/) | Burrows-Wheeler Aligner (BWA) is an efficient program that aligns relatively short nucleotide sequences against a long reference sequence such as the human genome. | -| [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) | FastQC is a quality control application for high throughput sequence data. It reads in sequence data in a variety of formats and can either provide an interactive application to review the results of several different QC checks, or create an HTML based report which can be integrated into a pipeline. | -| [GATK](http://www.broadinstitute.org/gatk/) | The Genome Analysis Toolkit or GATK is a software package developed at the Broad Institute to analyse next-generation resequencing data. The toolkit offers a wide variety of tools, with a primary focus on variant discovery and genotyping as well as strong emphasis on data quality assurance. Its robust architecture, powerful processing engine and high-performance computing features make it capable of taking on projects of any size. | -| [GROMACS](http://www.gromacs.org) | GROMACS is a versatile package to perform molecular dynamics, i.e. simulate the Newtonian equations of motion for systems with hundreds to millions of particles. | -| [HTSlib](http://www.htslib.org/) | A C library for reading/writing high-throughput sequencing data. This package includes the utilities bgzip and tabix | -| [picard](http://sourceforge.net/projects/picard) | A set of tools (in Java) for working with next generation sequencing data in the BAM format. | -| [PLUMED](http://www.plumed-code.org) | PLUMED is an open source library for free energy calculations in molecular systems which works together with some of the most popular molecular dynamics engines. Free energy calculations can be performed as a function of many order parameters with a particular focus on biological problems, using state of the art methods such as metadynamics, umbrella sampling and Jarzynski-equation based steered MD. The software, written in C++, can be easily interfaced with both fortran and C/C++ codes. | -| [RELION](http://www2.mrc-lmb.cam.ac.uk/relion/index.php/Main_Page) | RELION (for REgularised LIkelihood OptimisatioN, pronounce rely-on) is a stand-alone computer program that employs an empirical Bayesian approach to refinement of (multiple) 3D reconstructions or 2D class averages in electron cryo-microscopy (cryo-EM). | -| [SAMtools](http://www.htslib.org/) | SAM Tools provide various utilities for manipulating alignments in the SAM format, including sorting, merging, indexing and generating alignments in a per-position format. | -| [SnpEff](http://snpeff.sourceforge.net/) | Genetic variant annotation and effect prediction toolbox. | -| [Trimmomatic](http://www.usadellab.org/cms/?page=trimmomatic) | Trimmomatic performs a variety of useful trimming tasks for illumina paired-end and single ended data.The selection of trimming steps and their associated parameters are supplied on the command line. | - -## Cae - -| Module | Description | -| ------ | ----------- | -| Adams | | -| COMSOL | | -| Digimat | | -| [FreeFem++](http://www.freefem.org) | FreeFem++ is a partial differential equation solver. It has its own language. freefem scripts can solve multiphysics non linear systems in 2D and 3D. Problems involving PDE (2d, 3d) from several branches of physics such as fluid-structure interactions require interpolations of data on several meshes and their manipulation within one program. FreeFem++ includes a fast 2^d-tree-based interpolation algorithm and a language for the manipulation of data on multiple meshes (as a follow up of bamg (now a part of FreeFem++ ). FreeFem++ is written in C++ and the FreeFem++ language is a C++ idiom. It runs on Macs, Windows, Unix machines. FreeFem++ replaces the older freefem and freefem+. | -| HyperWorks | | -| Marc | | -| [OpenFOAM](http://www.openfoam.com/) | OpenFOAM is a free, open source CFD software package. OpenFOAM has an extensive range of features to solve anything from complex fluid flows involving chemical reactions, turbulence and heat transfer, to solid dynamics and electromagnetics. | - -## Chem - -| Module | Description | -| ------ | ----------- | -| [ABINIT](http://www.abinit.org/) | Abinit is a plane wave pseudopotential code for doing condensed phase electronic structure calculations using DFT. | -| [CP2K](http://www.cp2k.org/) | CP2K is a freely available (GPL) program, written in Fortran 95, to perform atomistic and molecular simulations of solid state, liquid, molecular and biological systems. It provides a general framework for different methods such as e.g. density functional theory (DFT) using a mixed Gaussian and plane waves approach (GPW), and classical pair and many-body potentials. | -| [LAMMPS](http://lammps.sandia.gov) | LAMMPS is a classical molecular dynamics code, and an acronym for Large-scale Atomic/Molecular Massively Parallel Simulator. Has potentials for solid-state materials (metals, semiconductors) and soft matter (biomolecules, polymers) and coarse-grained or mesoscopic systems. It can be used to model atoms or, more generically, as a parallel particle simulator at the atomic, meso, or continuum scale. | -| [libctl](http://ab-initio.mit.edu/libctl) | libctl is a free Guile-based library implementing flexible control files for scientific simulations. | -| [Libint](https://sourceforge.net/p/libint/) | Libint library is used to evaluate the traditional (electron repulsion) and certain novel two-body matrix elements (integrals) over Cartesian Gaussian functions used in modern atomic and molecular theory. | -| [libxc](http://www.tddft.org/programs/octopus/wiki/index.php/Libxc) | Libxc is a library of exchange-correlation functionals for density-functional theory. The aim is to provide a portable, well tested and reliable set of exchange and correlation functionals. | -| Molpro | | -| [NAMD](http://www.ks.uiuc.edu/Research/namd/) | NAMD is a parallel molecular dynamics code designed for high-performance simulation of large biomolecular systems. | -| [NWChem](http://www.nwchem-sw.org) | NWChem aims to provide its users with computational chemistry tools that are scalable both in their ability to treat large scientific computational chemistry problems efficiently, and in their use of available parallel computing resources from high-performance parallel supercomputers to conventional workstation clusters. NWChem software can handle: biomolecules, nanostructures, and solid-state; from quantum to classical, and all combinations; Gaussian basis functions or plane-waves; scaling from one to thousands of processors; properties and relativity. | -| [ORCA](http://cec.mpg.de/forum/) | ORCA is a flexible, efficient and easy-to-use general purpose tool for quantum chemistry with specific emphasis on spectroscopic properties of open-shell molecules. It features a wide variety of standard quantum chemical methods ranging from semiempirical methods to DFT to single- and multireference correlated ab initio methods. It can also treat environmental and relativistic effects. | -| [QuantumESPRESSO](http://www.pwscf.org/) | Quantum ESPRESSO is an integrated suite of computer codes for electronic-structure calculations and materials modeling at the nanoscale. It is based on density-functional theory, plane waves, and pseudopotentials (both norm-conserving and ultrasoft). | -| [S4MPLE](http://infochim.u-strasbg.fr/spip.php?rubrique152) | S4MPLE (Sampler For Multiple Protein-Ligand Entities) is a flexible molecular modeling tool, supporting empirical force field-driven conformational sampling and geometry optimization heuristics using a hybrid genetic algorithm (GA). | -| Scipion | | -| [xdrfile](http://www.gromacs.org/Developer_Zone/Programming_Guide/XTC_Library) | XTC library | - -## Compiler - -| Module | Description | -| ------ | ----------- | -| [BerkeleyUPC](http://upc.lbl.gov) | The goal of the Berkeley UPC compiler group is to develop a portable, high performance implementation of UPC for large-scale multiprocessors, PC clusters, and clusters of shared memory multiprocessors. | -| [Clang](http://clang.llvm.org/) | C, C++, Objective-C compiler, based on LLVM. Does not include C++ standard library -- use libstdc++ from GCC. | -| [GCC](http://gcc.gnu.org/) | The GNU Compiler Collection includes front ends for C, C++, Objective-C, Fortran, Java, and Ada, as well as libraries for these languages (libstdc++, libgcj,...). | -| [GCCcore](http://gcc.gnu.org/) | The GNU Compiler Collection includes front ends for C, C++, Objective-C, Fortran, Java, and Ada, as well as libraries for these languages (libstdc++, libgcj,...). | -| [icc](http://software.intel.com/en-us/intel-compilers/) | C and C++ compiler from Intel | -| [ifort](http://software.intel.com/en-us/intel-compilers/) | Fortran compiler from Intel | -| [LLVM](http://llvm.org/) | The LLVM Core libraries provide a modern source- and target-independent optimizer, along with code generation support for many popular CPUs (as well as some less common ones!) These libraries are built around a well specified code representation known as the LLVM intermediate representation ("LLVM IR"). The LLVM Core libraries are well documented, and it is particularly easy to invent your own language (or port an existing compiler) to use LLVM as an optimizer and code generator. | -| [OpenCoarrays](http://www.opencoarrays.org/) | A transport layer for coarray Fortran compilers. | -| PGI | | - -## Data - -| Module | Description | -| ------ | ----------- | -| [GDAL](http://www.gdal.org/) | GDAL is a translator library for raster geospatial data formats that is released under an X/MIT style Open Source license by the Open Source Geospatial Foundation. As a library, it presents a single abstract data model to the calling application for all supported formats. It also comes with a variety of useful commandline utilities for data translation and processing. | -| [h5py](http://www.h5py.org/) | HDF5 for Python (h5py) is a general-purpose Python interface to the Hierarchical Data Format library, version 5. HDF5 is a versatile, mature scientific software library designed for the fast, flexible storage of enormous amounts of data. | -| [HDF5](http://www.hdfgroup.org/HDF5/) | HDF5 is a unique technology suite that makes possible the management of extremely large and complex data collections. | -| [netCDF](http://www.unidata.ucar.edu/software/netcdf/) | NetCDF (network Common Data Form) is a set of software libraries and machine-independent data formats that support the creation, access, and sharing of array-oriented scientific data. | -| [netCDF-Fortran](http://www.unidata.ucar.edu/software/netcdf/) | NetCDF (network Common Data Form) is a set of software libraries and machine-independent data formats that support the creation, access, and sharing of array-oriented scientific data. | - -## Debugger - -| Module | Description | -| ------ | ----------- | -| aislinn | | -| DDT | | -| [Forge](http://www.allinea.com/products/develop-allinea-forge) | Allinea Forge is the complete toolsuite for software development - with everything needed to debug, profile, optimize, edit and build C, C++ and FORTRAN applications on Linux for high performance - from single threads through to complex parallel HPC codes with MPI, OpenMP, threads or CUDA. | -| [PerformanceReports](http://www.allinea.com/products/allinea-performance-reports) | Allinea Performance Reports are the most effective way to characterize and understand the performance of HPC application runs. One single-page HTML report elegantly answers a range of vital questions for any HPC site. - Is this application well-optimized for the system and the processors it is running on? - Does it benefit from running at this scale? - Are there I/O, networking or threading bottlenecks affecting performance? - Which hardware, software or configuration changes can we make to improve performance further. - How much energy did this application use? | -| TotalView | | -| [Valgrind](http://valgrind.org/downloads/) | Valgrind: Debugging and profiling tools | - -## Devel - -| Module | Description | -| ------ | ----------- | -| [ant](http://ant.apache.org/) | Apache Ant is a Java library and command-line tool whose mission is to drive processes described in build files as targets and extension points dependent upon each other. The main known usage of Ant is the build of Java applications. | -| [Autoconf](http://www.gnu.org/software/autoconf/) | Autoconf is an extensible package of M4 macros that produce shell scripts to automatically configure software source code packages. These scripts can adapt the packages to many kinds of UNIX-like systems without manual user intervention. Autoconf creates a configuration script for a package from a template file that lists the operating system features that the package can use, in the form of M4 macro calls. | -| [Automake](http://www.gnu.org/software/automake/automake.html) | Automake: GNU Standards-compliant Makefile generator | -| [Autotools](http://autotools.io) | This bundle collect the standard GNU build tools: Autoconf, Automake and libtool | -| [Boost](http://www.boost.org/) | Boost provides free peer-reviewed portable C++ source libraries. | -| [CMake](http://www.cmake.org) | CMake, the cross-platform, open-source build system. CMake is a family of tools designed to build, test and package software. | -| [Doxygen](http://www.doxygen.org) | Doxygen is a documentation system for C++, C, Java, Objective-C, Python, IDL (Corba and Microsoft flavors), Fortran, VHDL, PHP, C#, and to some extent D. | -| [fontsproto](http://www.freedesktop.org/wiki/Software/xlibs) | X11 font extension wire protocol | -| [glproto](http://www.freedesktop.org/wiki/Software/xlibs) | X protocol and ancillary headers | -| [gperf](http://www.gnu.org/software/gperf/) | GNU gperf is a perfect hash function generator. For a given list of strings, it produces a hash function and hash table, in form of C or C++ code, for looking up a value depending on the input string. The hash function is perfect, which means that the hash table has no collisions, and the hash table lookup needs a single string comparison only. | -| [guile](http://www.gnu.org/software/guile) | Guile is the GNU Ubiquitous Intelligent Language for Extensions, the official extension language for the GNU operating system. | -| [JUnit](http://sourceforge.net/projects/junit) | A programmer-oriented testing framework for Java. | -| [libSM](http://www.freedesktop.org/wiki/Software/xlibs) | X11 Session Management library, which allows for applications to both manage sessions, and make use of session managers to save and restore their state for later use. | -| [M4](http://www.gnu.org/software/m4/m4.html) | GNU M4 is an implementation of the traditional Unix macro processor. It is mostly SVR4 compatible although it has some extensions (for example, handling more than 9 positional parameters to macros). GNU M4 also has built-in functions for including files, running shell commands, doing arithmetic, etc. | -| [make](http://www.gnu.org/software/make/make.html) | make-3.82: GNU version of make utility | -| [makedepend](http://www.linuxfromscratch.org/blfs/view/svn/x/makedepend.html) | The makedepend package contains a C-preprocessor like utility to determine build-time dependencies. | -| [Maven](http://maven.apache.org/index.html) | Binary maven install, Apache Maven is a software project management and comprehension tool. Based on the concept of a project object model (POM), Maven can manage a project's build, reporting and documentation from a central piece of information. | -| [ncurses](http://www.gnu.org/software/ncurses/) | The Ncurses (new curses) library is a free software emulation of curses in System V Release 4.0, and more. It uses Terminfo format, supports pads and color and multiple highlights and forms characters and function-key mapping, and has all the other SYSV-curses enhancements over BSD Curses. | -| [PCRE](http://www.pcre.org/) | The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5. | -| [PCRE2](http://www.pcre.org/) | The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5. | -| [pkg-config](http://www.freedesktop.org/wiki/Software/pkg-config/) | pkg-config is a helper tool used when compiling applications and libraries. It helps you insert the correct compiler options on the command line so an application can use gcc -o test test.c `pkg-config --libs --cflags glib-2.0` for instance, rather than hard-coding values on where to find glib (or other libraries). | -| [python-meep](https://code.launchpad.net/python-meep) | Python wrapper for the Meep FDTD solver. | -| [Qt](http://qt-project.org/) | Qt is a comprehensive cross-platform C++ application framework. | -| [renderproto](http://www.freedesktop.org/wiki/Software/xlibs) | Xrender protocol and ancillary headers | -| [SCons](http://www.scons.org/) | SCons is a software construction tool. | -| [Spark](http://spark.apache.org) | Spark is Hadoop MapReduce done in memory | -| [SQLite](http://www.sqlite.org/) | SQLite: SQL Database Engine in a C Library | -| [SWIG](http://www.swig.org/) | SWIG is a software development tool that connects programs written in C and C++ with a variety of high-level programming languages. | -| [xbitmaps](http://www.freedesktop.org/wiki/Software/xlibs) | provides bitmaps for x | -| [xcb-proto](http://xcb.freedesktop.org/) | The X protocol C-language Binding (XCB) is a replacement for Xlib featuring a small footprint, latency hiding, direct access to the protocol, improved threading support, and extensibility. | -| [xextproto](http://www.freedesktop.org/wiki/Software/xlibs) | XExtProto protocol headers. | -| [xineramaproto](http://www.freedesktop.org/wiki/Software/xlibs) | X protocol and ancillary headers for xinerama | -| [xorg-macros](http://cgit.freedesktop.org/xorg/util/macros) | X.org macros utilities. | -| [xproto](http://www.freedesktop.org/wiki/Software/xlibs) | X protocol and ancillary headers | -| [xtrans](http://www.freedesktop.org/wiki/Software/xlibs) | xtrans includes a number of routines to make X implementations transport-independent; at time of writing, it includes support for UNIX sockets, IPv4, IPv6, and DECnet. | - -## Geo - -| Module | Description | -| ------ | ----------- | -| [DCW](http://gmt.soest.hawaii.edu/projects/gmt) | country polygons for GMT | -| [GMT](http://gmt.soest.hawaii.edu/) | GMT is an open source collection of about 80 command-line tools for manipulating geographic and Cartesian data sets (including filtering, trend fitting, gridding, projecting, etc.) and producing PostScript illustrations ranging from simple x-y plots via contour maps to artificially illuminated surfaces and 3D perspective views; the GMT supplements add another 40 more specialized and discipline-specific tools. | -| [PROJ_4](http://proj.osgeo.org) | PROJ.4 - Cartographic Projections Library originally written by Gerald Evenden then of the USGS. | - -## Lang - -| Module | Description | -| ------ | ----------- | -| [Bison](http://www.gnu.org/software/bison) | Bison is a general-purpose parser generator that converts an annotated context-free grammar into a deterministic LR or generalized LR (GLR) parser employing LALR(1) parser tables. | -| [byacc](http://invisible-island.net/byacc/byacc.html) | Berkeley Yacc (byacc) is generally conceded to be the best yacc variant available. In contrast to bison, it is written to avoid dependencies upon a particular compiler. | -| [flex](http://flex.sourceforge.net/) | Flex (Fast Lexical Analyzer) is a tool for generating scanners. A scanner, sometimes called a tokenizer, is a program which recognizes lexical patterns in text. | -| [Java](http://java.com/) | Java Platform, Standard Edition (Java SE) lets you develop and deploy Java applications on desktops and servers. | -| [libgdiplus](https://github.com/mono/libgdiplus) | An Open Source implementation of the GDI+ API. | -| [Lua](http://www.lua.org/) | Lua is a powerful, fast, lightweight, embeddable scripting language. Lua combines simple procedural syntax with powerful data description constructs based on associative arrays and extensible semantics. Lua is dynamically typed, runs by interpreting bytecode for a register-based virtual machine, and has automatic memory management with incremental garbage collection, making it ideal for configuration, scripting, and rapid prototyping. | -| [Mono](http://www.mono-project.com) | Mono is an open source implementation of Microsoft's .NET Framework based on the ECMA standards for C# and the Common Language Runtime. | -| [NASM](http://www.nasm.us/) | NASM: General-purpose x86 assembler | -| [OpenCL-builder](https://software.intel.com/en-us/intel-opencl) | OpenCL™ is the first open, royalty-free standard for cross-platform, parallel programming of modern processors found in personal computers, servers and handheld/embedded devices. OpenCL (Open Computing Language) greatly improves speed and responsiveness for a wide spectrum of applications in numerous market categories from gaming and entertainment to scientific and medical software. This is builder (formerly runtime) package. | -| [OpenCL-runtime](https://software.intel.com/en-us/intel-opencl) | OpenCL™ is the first open, royalty-free standard for cross-platform, parallel programming of modern processors found in personal computers, servers and handheld/embedded devices. OpenCL (Open Computing Language) greatly improves speed and responsiveness for a wide spectrum of applications in numerous market categories from gaming and entertainment to scientific and medical software. | -| [Perl](http://www.perl.org/) | Larry Wall's Practical Extraction and Report Language | -| [Python](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | -| [R](http://www.r-project.org/) | R is a free software environment for statistical computing and graphics. | -| [Racket](http://racket-lang.org) | Racket is a full-spectrum programming language. It goes beyond Lisp and Scheme with dialects that support objects, types, laziness, and more. | -| [Ruby](https://www.ruby-lang.org) | Ruby is a dynamic, open source programming language with a focus on simplicity and productivity. It has an elegant syntax that is natural to read and easy to write. | -| [SIP](http://www.riverbankcomputing.com/software/sip/) | SIP is a tool that makes it very easy to create Python bindings for C and C++ libraries. | -| [SnuCL](http://snucl.snu.ac.kr) | An OpenCL Framework for Heterogeneous Clusters | -| [Tcl](http://www.tcl.tk/) | Tcl (Tool Command Language) is a very powerful but easy to learn dynamic programming language, suitable for a very wide range of uses, including web and desktop applications, networking, administration, testing and many more. | - -## Lib - -| Module | Description | -| ------ | ----------- | -| [FOX](http://fox-toolkit.org) | FOX is a C++ based Toolkit for developing Graphical User Interfaces easily and effectively. It offers a wide, and growing, collection of Controls, and provides state of the art facilities such as drag and drop, selection, as well as OpenGL widgets for 3D graphical manipulation. | -| GLM | | -| [libdrm](http://dri.freedesktop.org) | Direct Rendering Manager runtime library. | -| libevent | | -| [libffi](http://sourceware.org/libffi/) | The libffi library provides a portable, high level programming interface to various calling conventions. This allows a programmer to call any function specified by a call interface description at run-time. | -| [libfontenc](http://www.freedesktop.org/wiki/Software/xlibs/) | X11 font encoding library | -| [libjpeg-turbo](http://sourceforge.net/libjpeg-turbo/) | libjpeg-turbo is a fork of the original IJG libjpeg which uses SIMD to accelerate baseline JPEG compression and decompression. libjpeg is a library that implements JPEG image encoding, decoding and transcoding. | -| [libmatheval](http://www.gnu.org/software/libmatheval/) | GNU libmatheval is a library (callable from C and Fortran) to parse and evaluate symbolic expressions input as text. | -| [libMesh](http://libmesh.github.io/) | The libMesh library provides a framework for the numerical simulation of partial differential equations using arbitrary unstructured discretizations on serial and parallel platforms. A major goal of the library is to provide support for adaptive mesh refinement (AMR) computations in parallel while allowing a research scientist to focus on the physics they are modeling. | -| [libpng](http://www.libpng.org/pub/png/libpng.html) | libpng is the official PNG reference library | -| [libpthread-stubs](http://xcb.freedesktop.org/) | The X protocol C-language Binding (XCB) is a replacement for Xlib featuring a small footprint, latency hiding, direct access to the protocol, improved threading support, and extensibility. | -| [libreadline](http://cnswww.cns.cwru.edu/php/chet/readline/rltop.html) | The GNU Readline library provides a set of functions for use by applications that allow users to edit command lines as they are typed in. Both Emacs and vi editing modes are available. The Readline library includes additional functions to maintain a list of previously-entered command lines, to recall and perhaps reedit those lines, and perform csh-like history expansion on previous commands. | -| [LibTIFF](http://www.remotesensing.org/libtiff/) | tiff: Library and tools for reading and writing TIFF data files | -| [libtool](http://www.gnu.org/software/libtool) | GNU libtool is a generic library support script. Libtool hides the complexity of using shared libraries behind a consistent, portable interface. | -| [libunistring](http://www.gnu.org/software/libunistring/) | This library provides functions for manipulating Unicode strings and for manipulating C strings according to the Unicode standard. | -| [libunwind](http://www.nongnu.org/libunwind/) | The primary goal of libunwind is to define a portable and efficient C programming interface (API) to determine the call-chain of a program. The API additionally provides the means to manipulate the preserved (callee-saved) state of each call-frame and to resume execution at any point in the call-chain (non-local goto). The API supports both local (same-process) and remote (across-process) operation. As such, the API is useful in a number of applications | -| [libxcb](http://xcb.freedesktop.org/) | The X protocol C-language Binding (XCB) is a replacement for Xlib featuring a small footprint, latency hiding, direct access to the protocol, improved threading support, and extensibility. | -| [libxml2](http://xmlsoft.org/) | Libxml2 is the XML C parser and toolchain developed for the Gnome project (but usable outside of the Gnome platform). | -| libxslt | | -| libyaml | | -| lxml | | -| MATIO | | -| [OpenCoarrays](http://www.opencoarrays.org) | OpenCoarrays is an open-source software project for developing, porting and tuning transport layers that support coarray Fortran compilers. | -| [PROJ](http://trac.osgeo.org/proj/) | Program proj is a standard Unix filter function which converts geographic longitude and latitude coordinates into cartesian coordinates | -| PyYAML | | -| [QCA](http://delta.affinix.com/qca/) | QCA aims to provide a straightforward and cross-platform crypto API, using Qt datatypes and conventions. QCA separates the API from the implementation, using plugins known as Providers | -| [QGIS](http://www.qgis.org) | A Free and Open Source Geographic Information System | -| [Qwt](http://qwt.sourceforge.net/index.html) | The Qwt library contains GUI Components and utility classes which are primarily useful for programs with a technical background. Beside a framework for 2D plots it provides scales, sliders, dials, compasses, thermometers, wheels and knobs to control or display values, arrays, or ranges of type double. | -| [SIONlib](http://www.fz-juelich.de/ias/jsc/EN/Expertise/Support/Software/SIONlib/_node.html) | SIONlib is a scalable I/O library for parallel access to task-local files. The library not only supports writing and reading binary data to or from several thousands of processors into a single or a small number of physical files, but also provides global open and close functions to access SIONlib files in parallel. This package provides a stripped-down installation of SIONlib for use with performance tools (e.g., Score-P), with renamed symbols to avoid conflicts when an application using SIONlib itself is linked against a tool requiring a different SIONlib version. | -| [spatialindex](https://libspatialindex.github.io/index.html) | The purpose of this library is to provide: * An extensible framework that will support robust spatial indexing methods. * Support for sophisticated spatial queries. Range, point location, nearest neighbor and k-nearest neighbor as well as parametric queries (defined by spatial constraints) should be easy to deploy and run. * Easy to use interfaces for inserting, deleting and updating information. | -| [SpatiaLite](https://www.gaia-gis.it/fossil/libspatialite/index) | SpatiaLite is an open source library intended to extend the SQLite core to support fully fledged Spatial SQL capabilities. | -| [tbb](http://software.intel.com/en-us/articles/intel-tbb/) | Intel Threading Building Blocks 4.0 (Intel TBB) is a widely used, award-winning C++ template library for creating reliable, portable, and scalable parallel applications. Use Intel TBB for a simple and rapid way of developing robust task-based parallel applications that scale to available processor cores, are compatible with multiple environments, and are easier to maintain. Intel TBB is the most proficient way to implement future-proof parallel applications that tap into the power and performance of multicore and manycore hardware platforms. | -| [tensorflow](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | -| [zlib](http://www.zlib.net/) | zlib is designed to be a free, general-purpose, legally unencumbered -- that is, not covered by any patents -- lossless data-compression library for use on virtually any computer hardware and operating system. | - -## Math - -| Module | Description | -| ------ | ----------- | -| [FIAT](https://bitbucket.org/fenics-project/fiat) | The FInite element Automatic Tabulator FIAT supports generation of arbitrary order instances of the Lagrange elements on lines, triangles, and tetrahedra. It is also capable of generating arbitrary order instances of Jacobi-type quadrature rules on the same element shapes. | -| [GEOS](http://trac.osgeo.org/geos) | GEOS (Geometry Engine - Open Source) is a C++ port of the Java Topology Suite (JTS) | -| [GMP](http://gmplib.org/) | GMP is a free library for arbitrary precision arithmetic, operating on signed integers, rational numbers, and floating point numbers. | -| [Harminv](http://ab-initio.mit.edu/wiki/index.php/Harminv) | Harminv is a free program (and accompanying library) to solve the problem of harmonic inversion - given a discrete-time, finite-length signal that consists of a sum of finitely-many sinusoids (possibly exponentially decaying) in a given bandwidth, it determines the frequencies, decay constants, amplitudes, and phases of those sinusoids. | -| [ISL](http://isl.gforge.inria.fr/) | isl is a library for manipulating sets and relations of integer points bounded by linear constraints. | -| [METIS](http://glaros.dtc.umn.edu/gkhome/metis/metis/overview) | METIS is a set of serial programs for partitioning graphs, partitioning finite element meshes, and producing fill reducing orderings for sparse matrices. The algorithms implemented in METIS are based on the multilevel recursive-bisection, multilevel k-way, and multi-constraint partitioning schemes. | -| MPC | | -| [MPFR](http://www.mpfr.org) | The MPFR library is a C library for multiple-precision floating-point computations with correct rounding. | -| MUMPS | | -| [numpy](http://www.numpy.org) | NumPy is the fundamental package for scientific computing with Python. It contains among other things: a powerful N-dimensional array object, sophisticated (broadcasting) functions, tools for integrating C/C++ and Fortran code, useful linear algebra, Fourier transform, and random number capabilities. Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases. | -| [Octave](http://www.gnu.org/software/octave/) | GNU Octave is a high-level interpreted language, primarily intended for numerical computations. | -| [ParMETIS](http://glaros.dtc.umn.edu/gkhome/metis/parmetis/overview) | ParMETIS is an MPI-based parallel library that implements a variety of algorithms for partitioning unstructured graphs, meshes, and for computing fill-reducing orderings of sparse matrices. ParMETIS extends the functionality provided by METIS and includes routines that are especially suited for parallel AMR computations and large scale numerical simulations. The algorithms implemented in ParMETIS are based on the parallel multilevel k-way graph-partitioning, adaptive repartitioning, and parallel multi-constrained partitioning schemes. | -| [ScientificPython](https://sourcesup.cru.fr/projects/scientific-py/) | ScientificPython is a collection of Python modules for scientific computing. It contains support for geometry, mathematical functions, statistics, physical units, IO, visualization, and parallelization. | -| [SCOTCH](http://gforge.inria.fr/projects/scotch/) | Software package and libraries for sequential and parallel graph partitioning, static mapping, and sparse matrix block ordering, and sequential mesh and hypergraph partitioning. | -| [sympy](http://sympy.org/) | SymPy is a Python library for symbolic mathematics. It aims to become a full-featured computer algebra system (CAS) while keeping the code as simple as possible in order to be comprehensible and easily extensible. SymPy is written entirely in Python and does not require any external libraries. | - -## Mpi - -| Module | Description | -| ------ | ----------- | -| [impi](http://software.intel.com/en-us/intel-mpi-library/) | The Intel(R) MPI Library for Linux* OS is a multi-fabric message passing library based on ANL MPICH2 and OSU MVAPICH2. The Intel MPI Library for Linux OS implements the Message Passing Interface, version 2 (MPI-2) specification. | -| [MPI_NET](http://www.osl.iu.edu/research/mpi.net/) | MPI.NET is a high-performance, easy-to-use implementation of the Message Passing Interface (MPI) for Microsoft's .NET environment | -| [MPICH](http://www.mpich.org/) | MPICH v3.x is an open source high-performance MPI 3.0 implementation. It does not support InfiniBand (use MVAPICH2 with InfiniBand devices). | -| mpt | | -| [MVAPICH2](http://mvapich.cse.ohio-state.edu/overview/mvapich2/) | This is an MPI 3.0 implementation. It is based on MPICH2 and MVICH. | -| [OpenMPI](http://www.open-mpi.org/) | The Open MPI Project is an open source MPI-2 implementation. | - -## Numlib - -| Module | Description | -| ------ | ----------- | -| [Armadillo](http://arma.sourceforge.net/) | Armadillo is an open-source C++ linear algebra library (matrix maths) aiming towards a good balance between speed and ease of use. Integer, floating point and complex numbers are supported, as well as a subset of trigonometric and statistics functions. | -| [arpack-ng](http://forge.scilab.org/index.php/p/arpack-ng/) | ARPACK is a collection of Fortran77 subroutines designed to solve large scale eigenvalue problems. | -| [FFTW](http://www.fftw.org) | FFTW is a C subroutine library for computing the discrete Fourier transform (DFT) in one or more dimensions, of arbitrary input size, and of both real and complex data. | -| [GSL](http://www.gnu.org/software/gsl/) | The GNU Scientific Library (GSL) is a numerical library for C and C++ programmers. The library provides a wide range of mathematical routines such as random number generators, special functions and least-squares fitting. | -| [Hypre](https://computation.llnl.gov/casc/linear_solvers/sls_hypre.html) | Hypre is a library for solving large, sparse linear systems of equations on massively parallel computers. The problems of interest arise in the simulation codes being developed at LLNL and elsewhere to study physical phenomena in the defense, environmental, energy, and biological sciences. | -| [imkl](http://software.intel.com/en-us/intel-mkl/) | Intel Math Kernel Library is a library of highly optimized, extensively threaded math routines for science, engineering, and financial applications that require maximum performance. Core math functions include BLAS, LAPACK, ScaLAPACK, Sparse Solvers, Fast Fourier Transforms, Vector Math, and more. | -| [LAPACKE](http://www.netlib.org/lapack/lapacke.html) | LAPACKE C Interface to LAPACK header files and library | -| [OpenBLAS](http://xianyi.github.com/OpenBLAS/) | OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. | -| [PETSc](http://www.mcs.anl.gov/petsc) | PETSc, pronounced PET-see (the S is silent), is a suite of data structures and routines for the scalable (parallel) solution of scientific applications modeled by partial differential equations. | -| [ScaLAPACK](http://www.netlib.org/scalapack/) | The ScaLAPACK (or Scalable LAPACK) library includes a subset of LAPACK routines redesigned for distributed memory MIMD parallel computers. | -| [SuiteSparse](http://www.cise.ufl.edu/research/sparse/SuiteSparse/) | SuiteSparse is a collection of libraries manipulate sparse matrices. | - -## Perf - -| Module | Description | -| ------ | ----------- | -| [Advisor](https://software.intel.com/intel-advisor-xe) | Vectorization Optimization and Thread Prototyping - Vectorize & thread code or performance “dies” - Easy workflow + data + tips = faster code faster - Prioritize, Prototype & Predict performance gain | -| [Cube](http://www.scalasca.org/software/cube-4.x/download.html) | Cube, which is used as performance report explorer for Scalasca and Score-P, is a generic tool for displaying a multi-dimensional performance space consisting of the dimensions (i) performance metric, (ii) call path, and (iii) system resource. Each dimension can be represented as a tree, where non-leaf nodes of the tree can be collapsed or expanded to achieve the desired level of granularity. | -| [ipp](http://software.intel.com/en-us/articles/intel-ipp/) | Intel Integrated Performance Primitives (Intel IPP) is an extensive library of multicore-ready, highly optimized software functions for multimedia, data processing, and communications applications. Intel IPP offers thousands of optimized functions covering frequently used fundamental algorithms. | -| MAP | | -| [OPARI2](http://www.score-p.org) | OPARI2, the successor of Forschungszentrum Juelich's OPARI, is a source-to-source instrumentation tool for OpenMP and hybrid codes. It surrounds OpenMP directives and runtime library calls with calls to the POMP2 measurement interface. | -| [OTF2](http://www.score-p.org) | The Open Trace Format 2 is a highly scalable, memory efficient event trace data format plus support library. It will become the new standard trace format for Scalasca, Vampir, and Tau and is open for other tools. | -| [PAPI](http://icl.cs.utk.edu/projects/papi/) | PAPI provides the tool designer and application engineer with a consistent interface and methodology for use of the performance counter hardware found in most major microprocessors. PAPI enables software engineers to see, in near real time, the relation between software performance and processor events. In addition Component PAPI provides access to a collection of components that expose performance measurement opportunites across the hardware and software stack. | -| perfboost | | -| perfcatcher | | -| PerfReports | | -| perfsuite | | -| [Vampir](http://www.vampir.eu) | The Vampir software tool provides an easy-to-use framework that enables developers to quickly display and analyze arbitrary program behavior at any level of detail. The tool suite implements optimized event analysis algorithms and customizable displays that enable fast and interactive rendering of very complex performance monitoring data. | -| [VampirServer](http://www.vampir.eu) | The Vampir software tool provides an easy-to-use framework that enables developers to quickly display and analyze arbitrary program behavior at any level of detail. The tool suite implements optimized event analysis algorithms and customizable displays that enable fast and interactive rendering of very complex performance monitoring data. | - -## Phys - -| Module | Description | -| ------ | ----------- | -| [Meep](http://ab-initio.mit.edu/wiki/index.php/Meep) | Meep (or MEEP) is a free finite-difference time-domain (FDTD) simulation software package developed at MIT to model electromagnetic systems. | -| [phono3py](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | -| [phonopy](http://python.org/) | Python is a programming language that lets you work more quickly and integrate your systems more effectively. | -| Siesta | | -| VASP | | - -## System - -| Module | Description | -| ------ | ----------- | -| [eudev](https://wiki.gentoo.org/wiki/Project:Eudev) | eudev is a fork of systemd-udev with the goal of obtaining better compatibility with existing software such as OpenRC and Upstart, older kernels, various toolchains and anything else required by users and various distributions. | -| [hwloc](http://www.open-mpi.org/projects/hwloc/) | The Portable Hardware Locality (hwloc) software package provides a portable abstraction (across OS, versions, architectures, ...) of the hierarchical topology of modern architectures, including NUMA memory nodes, sockets, shared caches, cores and simultaneous multithreading. It also gathers various system attributes such as cache and memory information as well as the locality of I/O devices such as network interfaces, InfiniBand HCAs or GPUs. It primarily aims at helping applications with gathering information about modern computing hardware so as to exploit it accordingly and efficiently. | -| [libpciaccess](http://cgit.freedesktop.org/xorg/lib/libpciaccess/) | Generic PCI access library. | - -## Toolchain - -| Module | Description | -| ------ | ----------- | -| [foss]((none)) | GNU Compiler Collection (GCC) based compiler toolchain, including OpenMPI for MPI support, OpenBLAS (BLAS and LAPACK support), FFTW and ScaLAPACK. | -| [GNU](http://www.gnu.org/software/) | Compiler-only toolchain with GCC and binutils. | -| [gompi]((none)) | GNU Compiler Collection (GCC) based compiler toolchain, including OpenMPI for MPI support. | -| [iccifort](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel C, C++ and Fortran compilers | -| [ictce](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel Cluster Toolkit Compiler Edition provides Intel C/C++ and Fortran compilers, Intel MPI & Intel MKL. | -| [iimpi](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel C/C++ and Fortran compilers, alongside Intel MPI. | -| [intel](http://software.intel.com/en-us/intel-cluster-toolkit-compiler/) | Intel Cluster Toolkit Compiler Edition provides Intel C/C++ and Fortran compilers, Intel MPI & Intel MKL. | -| iompi | | -| [PRACE](http://www.prace-ri.eu/PRACE-Common-Production) | The PRACE Common Production Environment (PCPE) is a set of software tools and libraries that are planned to be available on all PRACE execution sites. The PCPE also defines a set of environment variables that try to make compilation on all sites as homogeneous and simple as possible. | -| [prace](http://www.prace-ri.eu/PRACE-Common-Production) | PRACE Common Production Environment (PCPE) Initialisation of the PRACE common production environment. This allows you to assume that the following tools/libraries are available by default in your PATH/environment. * Fortran, C, C++ Compilers * MPI * BLAS, LAPACK, BLACS, ScaLAPACK * FFTW * HDF5, NetCDF The compiler commands on are: * mpif90 - Fortran compiler * mpicc - C compiler * mpicxx - C++ compiler For more information on the PCPE please see the documentation at: http://www.prace-ri.eu/PRACE-Common-Production For help using this system, please see Local User Guide available at: http://prace-ri.eu/Best-Practice-Guide-Anselm-HTML | - -## Tools - -| Module | Description | -| ------ | ----------- | -| ANSYS | | -| [APR](http://apr.apache.org/) | Apache Portable Runtime (APR) libraries. | -| [APR-util](http://apr.apache.org/) | Apache Portable Runtime (APR) util libraries. | -| [Bash](http://www.gnu.org/software/bash) | Bash is an sh-compatible command language interpreter that executes commands read from the standard input or from a file. Bash also incorporates useful features from the Korn and C shells (ksh and csh). | -| [binutils](http://directory.fsf.org/project/binutils/) | binutils: GNU binary utilities | -| [bzip2](http://www.bzip.org/) | bzip2 is a freely available, patent free, high-quality data compressor. It typically compresses files to within 10% to 15% of the best available techniques (the PPM family of statistical compressors), whilst being around twice as fast at compression and six times faster at decompression. | -| [cURL](http://curl.haxx.se) | libcurl is a free and easy-to-use client-side URL transfer library, supporting DICT, FILE, FTP, FTPS, Gopher, HTTP, HTTPS, IMAP, IMAPS, LDAP, LDAPS, POP3, POP3S, RTMP, RTSP, SCP, SFTP, SMTP, SMTPS, Telnet and TFTP. libcurl supports SSL certificates, HTTP POST, HTTP PUT, FTP uploading, HTTP form based upload, proxies, cookies, user+password authentication (Basic, Digest, NTLM, Negotiate, Kerberos), file transfer resume, http proxy tunneling and more. | -| [EasyBuild](http://hpcugent.github.com/easybuild/) | EasyBuild is a software build and installation framework written in Python that allows you to install software in a structured, repeatable and robust way. | -| [expat](http://expat.sourceforge.net/) | Expat is an XML parser library written in C. It is a stream-oriented parser in which an application registers handlers for things the parser might find in the XML document (like start tags) | -| [git](http://git-scm.com/) | Git is a free and open source distributed version control system designed to handle everything from small to very large projects with speed and efficiency. | -| globus | | -| [gzip](http://www.gnu.org/software/gzip/) | gzip (GNU zip) is a popular data compression program as a replacement for compress | -| help2man | | -| [HPL](http://www.netlib.org/benchmark/hpl/) | HPL is a software package that solves a (random) dense linear system in double precision (64 bits) arithmetic on distributed-memory computers. It can thus be regarded as a portable as well as freely available implementation of the High Performance Computing Linpack Benchmark. | -| [Inspector](http://software.intel.com/en-us/intel-inspector-xe) | Intel Inspector XE 2013 is an easy to use memory error checker and thread checker for serial and parallel applications | -| [itac](http://software.intel.com/en-us/intel-trace-analyzer/) | The Intel Trace Collector is a low-overhead tracing library that performs event-based tracing in applications. The Intel Trace Analyzer provides a convenient way to monitor application activities gathered by the Intel Trace Collector through graphical displays. | -| [JOE](http://joe-editor.sourceforge.net) | JOE is a full featured terminal-based screen editor which is distributed under the GNU General Public License (GPL) | -| [likwid](https://github.com/RRZE-HPC/likwid) | Toolsuite of command line applications for performance oriented programmers | -| [Lmod](http://sourceforge.net/projects/lmod/) | Lmod is a Lua based module system. Modules allow for dynamic modification of a user's environment under Unix systems. See www.tacc.utexas.edu/tacc-projects/lmod for a complete description. Lmod is a new implementation that easily handles the MODULEPATH Hierarchical problem. It is drop-in replacement for TCL/C modules and reads TCL modulefiles directly. | -| MATLAB | | -| [Mercurial](http://mercurial.selenic.com/) | Mercurial is a free, distributed source control management tool. It efficiently handles projects of any size and offers an easy and intuitive interface. | -| [MIKE](http://www.mikepoweredbydhi.com) | MIKE Powered by DHI is a part of DHI, the global organisation dedicated to solving challenges in water environments worldwide. | -| [numactl](http://oss.sgi.com/projects/libnuma/) | The numactl program allows you to run your application program on specific cpu's and memory nodes. It does this by supplying a NUMA memory policy to the operating system before running your program. The libnuma library provides convenient ways for you to add NUMA memory policies into your own program. | -| [PAPI](http://icl.cs.utk.edu/projects/papi/) | PAPI provides the tool designer and application engineer with a consistent interface and methodology for use of the performance counter hardware found in most major microprocessors. PAPI enables software engineers to see, in near real time, the relation between software performance and processor events. In addition Component PAPI provides access to a collection of components that expose performance measurement opportunites across the hardware and software stack. | -| [parallel](http://savannah.gnu.org/projects/parallel/) | parallel: Build and execute shell commands in parallel | -| pigz | | -| [QEMU](http://wiki.qemu.org/Main_Page) | QEMU is a generic and open source machine emulator and virtualizer. | -| [RStudio](https://www.rstudio.com) | RStudio is a set of integrated tools designed to help you be more productive with R. It includes a console, syntax-highlighting editor that supports direct code execution, as well as tools for plotting, history, debugging and workspace management. | -| Scalasca | | -| Score-P | | -| [SDE](https://software.intel.com/en-us/articles/intel-software-development-emulator) | Intel Software Development Emulator is a pintool that enables the development of applications using instruction set extensions that are not currently implemented in hardware. | -| [Serf](http://serf.apache.org/) | The serf library is a high performance C-based HTTP client library built upon the Apache Portable Runtime (APR) library | -| Singularity | | -| [Subversion](http://subversion.apache.org/) | Subversion is an open source version control system. | -| [Szip](http://www.hdfgroup.org/doc_resource/SZIP/) | Szip compression software, providing lossless compression of scientific data | -| [tcsh](http://www.tcsh.org) | Tcsh is an enhanced, but completely compatible version of the Berkeley UNIX C shell (csh). It is a command language interpreter usable both as an interactive login shell and a shell script command processor. It includes a command-line editor, programmable word completion, spelling correction, a history mechanism, job control and a C-like syntax. | -| tmux | | -| [turbovnc](http://www.turbovnc.org) | TurboVNC is a derivative of VNC (Virtual Network Computing) that is tuned to provide peak performance for 3D and video workloads. | -| [util-linux](http://www.kernel.org/pub/linux/utils/util-linux) | Set of Linux utilities | -| [VDE2](http://vde.sourceforge.net) | VDE is an ethernet compliant virtual network that can be spawned over a set of physical computer over the Internet. VDE is part of virtualsquare project. | -| [VirtualGL](http://www.virtualgl.org) | VirtualGL is an open source toolkit that gives any Unix or Linux remote display software the ability to run OpenGL applications with full 3D hardware acceleration. | -| [VTune](http://software.intel.com/en-us/intel-vtune-amplifier-xe) | Intel VTune Amplifier XE 2016 is the premier performance profiler for C, C++, C#, Fortran, Assembly and Java. | -| [Wine](https://www.winehq.org) | Wine (originally an acronym for "Wine Is Not an Emulator") is a compatibility layer capable of running Windows applications on several POSIX-compliant operating systems, such as Linux, Mac OSX, & BSD. | -| [XZ](http://tukaani.org/xz/) | xz: XZ utilities | - ## Vis | Module | Description | | ------ | ----------- | | [cairo](http://cairographics.org) | Cairo is a 2D graphics library with support for multiple output devices. Currently supported output targets include the X Window System (via both Xlib and XCB), Quartz, Win32, image buffers, PostScript, PDF, and SVG file output. Experimental backends include OpenGL, BeOS, OS/2, and DirectFB | -| [ffmpeg](https://www.ffmpeg.org/) | A complete, cross-platform solution to record, convert and stream audio and video. | -| [fixesproto](http://www.freedesktop.org/wiki/Software/xlibs) | X.org FixesProto protocol headers. | -| [FLTK](http://www.fltk.org) | FLTK is a cross-platform C++ GUI toolkit for UNIX/Linux (X11), Microsoft Windows, and MacOS X. FLTK provides modern GUI functionality without the bloat and supports 3D graphics via OpenGL and its built-in GLUT emulation. | | [fontconfig](http://www.freedesktop.org/software/fontconfig) | Fontconfig is a library designed to provide system-wide font configuration, customization and application access. | | [freetype](http://freetype.org) | FreeType 2 is a software font engine that is designed to be small, efficient, highly customizable, and portable while capable of producing high-quality output (glyph images). It can be used in graphics libraries, display servers, font conversion tools, text image generation tools, and many other products as well. | | [gettext](http://www.gnu.org/software/gettext/) | GNU `gettext' is an important step for the GNU Translation Project, as it is an asset on which we may build many other steps. This package offers to programmers, translators, and even users, a well integrated set of tools and documentation | | [GLib](http://www.gtk.org/) | GLib is one of the base libraries of the GTK+ project | -| [GPI-2](http://www.gpi-site.com/gpi2/) | GPI-2 is an API for the development of scalable, asynchronous and fault tolerant parallel applications. | -| [grace](http://freecode.com/projects/grace) | Grace is a WYSIWYG 2D plotting tool for X Windows System and Motif. | -| [inputproto](http://www.freedesktop.org/wiki/Software/xlibs) | X.org InputProto protocol headers. | | [JasPer](http://www.ece.uvic.ca/~frodo/jasper/) | The JasPer Project is an open-source initiative to provide a free software-based reference implementation of the codec specified in the JPEG-2000 Part-1 standard. | -| [kbproto](http://www.freedesktop.org/wiki/Software/xlibs) | X.org KBProto protocol headers. | | [libGLU](ftp://ftp.freedesktop.org/pub/mesa/glu/) | The OpenGL Utility Library (GLU) is a computer graphics library for OpenGL. | -| [libICE](http://www.freedesktop.org/wiki/Software/xlibs) | X Inter-Client Exchange library for freedesktop.org | -| [libX11](http://www.freedesktop.org/wiki/Software/xlibs) | X11 client-side library | -| [libXau](http://www.freedesktop.org/wiki/Software/xlibs) | The libXau package contains a library implementing the X11 Authorization Protocol. This is useful for restricting client access to the display. | -| [libXdamage](http://www.freedesktop.org/wiki/Software/xlibs) | X Damage extension library | -| [libXdmcp](http://www.freedesktop.org/wiki/Software/xlibs) | The libXdmcp package contains a library implementing the X Display Manager Control Protocol. This is useful for allowing clients to interact with the X Display Manager. | -| [libXext](http://www.freedesktop.org/wiki/Software/xlibs) | Common X Extensions library | -| [libXfixes](http://www.freedesktop.org/wiki/Software/xlibs) | X Fixes extension library | -| [libXfont](http://www.freedesktop.org/wiki/Software/xlibs) | X font libary | -| [libXft](http://www.freedesktop.org/wiki/Software/xlibs) | X11 client-side library | -| [libXinerama](http://www.freedesktop.org/wiki/Software/xlibs) | Xinerama multiple monitor library | -| [libXrender](http://www.freedesktop.org/wiki/Software/xlibs) | X11 client-side library | -| [libXt](http://www.freedesktop.org/wiki/Software/xlibs) | libXt provides the X Toolkit Intrinsics, an abstract widget library upon which other toolkits are based. Xt is the basis for many toolkits, including the Athena widgets (Xaw), and LessTif (a Motif implementation). | -| matplotlib | | | [Mesa](http://www.mesa3d.org/) | Mesa is an open-source implementation of the OpenGL specification - a system for rendering interactive 3D graphics. | -| [motif](http://motif.ics.com/) | Motif refers to both a graphical user interface (GUI) specification and the widget toolkit for building applications that follow that specification under the X Window System on Unix and other POSIX-compliant systems. It was the standard toolkit for the Common Desktop Environment and thus for Unix. | -| [OpenCV](http://opencv.org/) | OpenCV (Open Source Computer Vision Library) is an open source computer vision and machine learning software library. OpenCV was built to provide a common infrastructure for computer vision applications and to accelerate the use of machine perception in the commercial products. | -| [OpenDX](http://www.opendx.org) | Open source visualization software package based on IBM's Visualization Data Explorer. | -| [OSPRay](http://www.ospray.org) | A Ray Tracing Based Rendering Engine for High-Fidelity Visualization | -| [p4vasp](http://www.p4vasp.at) | p4vasp is a visualization suite for the Vienna Ab-initio Simulation Package (VASP). It contains an extensible GUI framework, that can be used to view material structure, density of states, band-structure and more. | | [ParaView](http://www.paraview.org) | ParaView is a scientific parallel visualizer. | | [pixman](http://www.pixman.org/) | Pixman is a low-level software library for pixel manipulation, providing features such as image compositing and trapezoid rasterization. Important users of pixman are the cairo graphics library and the X server. | -| [PyQt](http://www.riverbankcomputing.co.uk/software/pyqt) | PyQt is a set of Python v2 and v3 bindings for Digia's Qt application framework. | -| [SUMO](http://www.sumo.dlr.de/wiki/Main_Page) | Simulation of Urban MObility (SUMO) is an open source, highly portable, microscopic and continuous road traffic simulation package designed to handle large road networks. | | [Tk](http://www.tcl.tk/) | Tk is an open source, cross-platform widget toolchain that provides a library of basic elements for building a graphical user interface (GUI) in many different programming languages. | | [VisIt](https://wci.llnl.gov/simulation/computer-codes/visit) | VisIt is an Open Source, interactive, scalable, visualization, animation and analysis tool | +| [X11](https://www.x.org) | The X Window System (X11) is a windowing system for bitmap displays | diff --git a/docs.it4i/salomon/capacity-computing.md b/docs.it4i/salomon/capacity-computing.md index 39b4c029903b04c067c9f9e2d7e48d13fac3f133..8b0a331fb605b9ffebf9a5835ef383a48613ea55 100644 --- a/docs.it4i/salomon/capacity-computing.md +++ b/docs.it4i/salomon/capacity-computing.md @@ -1,4 +1,4 @@ -# Capacity computing +# Capacity Computing ## Introduction @@ -16,7 +16,7 @@ However, executing huge number of jobs via the PBS queue may strain the system. ## Policy 1. A user is allowed to submit at most 100 jobs. Each job may be [a job array](capacity-computing/#job-arrays). -1. The array size is at most 1000 subjobs. +1. The array size is at most 1500 subjobs. ## Job Arrays @@ -53,7 +53,7 @@ Then we create jobscript: #PBS -q qprod #PBS -l select=1:ncpus=24,walltime=02:00:00 -# change to local scratch directory +# change to scratch directory SCR=/scratch/work/user/$USER/$PBS_JOBID mkdir -p $SCR ; cd $SCR || exit @@ -70,7 +70,7 @@ cp $PBS_O_WORKDIR/$TASK input ; cp $PBS_O_WORKDIR/myprog.x . cp output $PBS_O_WORKDIR/$TASK.out ``` -In this example, the submit directory holds the 900 input files, executable myprog.x and the jobscript file. As input for each run, we take the filename of input file from created tasklist file. We copy the input file to scratch /scratch/work/user/$USER/$PBS_JOBID, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. The myprog.x runs on one node only and must use threads to run in parallel. Be aware, that if the myprog.x **is not multithreaded**, then all the **jobs are run as single thread programs in sequential** manner. Due to allocation of the whole node, the **accounted time is equal to the usage of whole node**, while using only 1/24 of the node! +In this example, the submit directory holds the 900 input files, executable myprog.x and the jobscript file. As input for each run, we take the filename of input file from created tasklist file. We copy the input file to scratch (/scratch/work/user/$USER/$PBS_JOBID), execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. The myprog.x runs on one node only and must use threads to run in parallel. Be aware, that if the myprog.x **is not multithreaded**, then all the **jobs are run as single thread programs in sequential** manner. Due to allocation of the whole node, the **accounted time is equal to the usage of whole node**, while using only 1/24 of the node! If huge number of parallel multicore (in means of multinode multithread, e. g. MPI enabled) jobs is needed to run, then a job array approach should also be used. The main difference compared to previous example using one node is that the local scratch should not be used (as it's not shared between nodes) and MPI or other technique for parallel multinode run has to be used properly. @@ -154,12 +154,12 @@ Read more on job arrays in the [PBSPro Users guide](../pbspro/). !!! note Use GNU parallel to run many single core tasks on one node. -GNU parallel is a shell tool for executing jobs in parallel using one or more computers. A job can be a single command or a small script that has to be run for each of the lines in the input. GNU parallel is most useful in running single core jobs via the queue system on Anselm. +GNU parallel is a shell tool for executing jobs in parallel using one or more computers. A job can be a single command or a small script that has to be run for each of the lines in the input. GNU parallel is most useful in running single core jobs via the queue system on the cluster. For more information and examples see the parallel man page: ```console -$ module add parallel +$ ml parallel $ man parallel ``` @@ -186,9 +186,9 @@ Then we create jobscript: #PBS -l select=1:ncpus=24,walltime=02:00:00 [ -z "$PARALLEL_SEQ" ] && -{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/tasklist $0 ; } +{ ml parallel ; exec parallel -a $PBS_O_WORKDIR/tasklist $0 ; } -# change to local scratch directory +# change to scratch directory SCR=/scratch/work/user/$USER/$PBS_JOBID/$PARALLEL_SEQ mkdir -p $SCR ; cd $SCR || exit @@ -205,7 +205,7 @@ cat input > output cp output $PBS_O_WORKDIR/$TASK.out ``` -In this example, tasks from tasklist are executed via the GNU parallel. The jobscript executes multiple instances of itself in parallel, on all cores of the node. Once an instace of jobscript is finished, new instance starts until all entries in tasklist are processed. Currently processed entry of the joblist may be retrieved via $1 variable. Variable $TASK expands to one of the input filenames from tasklist. We copy the input file to local scratch, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. +In this example, tasks from tasklist are executed via the GNU parallel. The jobscript executes multiple instances of itself in parallel, on all cores of the node. Once an instace of jobscript is finished, new instance starts until all entries in tasklist are processed. Currently processed entry of the joblist may be retrieved via $1 variable. Variable $TASK expands to one of the input filenames from tasklist. We copy the input file to the scratch, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. ### Submit the Job @@ -237,7 +237,7 @@ Combined approach, very similar to job arrays, can be taken. Job array is submit Example: -Assume we have 992 input files with name beginning with "file" (e. g. file001, ..., file992). Assume we would like to use each of these input files with program executable myprog.x, each as a separate single core job. We call these single core jobs tasks. +Assume we have 960 input files with name beginning with "file" (e. g. file001, ..., file960). Assume we would like to use each of these input files with program executable myprog.x, each as a separate single core job. We call these single core jobs tasks. First, we create a tasklist file, listing all tasks - all input files in our example: @@ -248,7 +248,7 @@ $ find . -name 'file*' > tasklist Next we create a file, controlling how many tasks will be executed in one subjob ```console -$ seq 32 > numtasks +$ seq 48 > numtasks ``` Then we create jobscript: @@ -260,9 +260,9 @@ Then we create jobscript: #PBS -l select=1:ncpus=24,walltime=02:00:00 [ -z "$PARALLEL_SEQ" ] && -{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/numtasks $0 ; } +{ ml parallel ; exec parallel -a $PBS_O_WORKDIR/numtasks $0 ; } -# change to local scratch directory +# change to scratch directory SCR=/scratch/work/user/$USER/$PBS_JOBID/$PARALLEL_SEQ mkdir -p $SCR ; cd $SCR || exit @@ -281,7 +281,7 @@ cat input > output cp output $PBS_O_WORKDIR/$TASK.out ``` -In this example, the jobscript executes in multiple instances in parallel, on all cores of a computing node. Variable $TASK expands to one of the input filenames from tasklist. We copy the input file to local scratch, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. The numtasks file controls how many tasks will be run per subjob. Once an task is finished, new task starts, until the number of tasks in numtasks file is reached. +In this example, the jobscript executes in multiple instances in parallel, on all cores of a computing node. Variable $TASK expands to one of the input filenames from tasklist. We copy the input file to the scratch, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. The numtasks file controls how many tasks will be run per subjob. Once an task is finished, new task starts, until the number of tasks in numtasks file is reached. !!! note Select subjob walltime and number of tasks per subjob carefully @@ -294,14 +294,14 @@ When deciding this values, think about following guiding rules : ### Submit the Job Array (-J) -To submit the job array, use the qsub -J command. The 992 tasks' job of the [example above](capacity-computing/#combined_example) may be submitted like this: +To submit the job array, use the qsub -J command. The 960 tasks' job of the [example above](capacity-computing/#combined_example) may be submitted like this: ```console -$ qsub -N JOBNAME -J 1-992:32 jobscript +$ qsub -N JOBNAME -J 1-960:48 jobscript 12345[].dm2 ``` -In this example, we submit a job array of 31 subjobs. Note the -J 1-992:**48**, this must be the same as the number sent to numtasks file. Each subjob will run on full node and process 24 input files in parallel, 48 in total per subjob. Every subjob is assumed to complete in less than 2 hours. +In this example, we submit a job array of 20 subjobs. Note the -J 1-960:48, this must be the same as the number sent to numtasks file. Each subjob will run on full node and process 24 input files in parallel, 48 in total per subjob. Every subjob is assumed to complete in less than 2 hours. !!! note Use #PBS directives in the beginning of the jobscript file, dont' forget to set your valid PROJECT_ID and desired queue. @@ -310,7 +310,7 @@ In this example, we submit a job array of 31 subjobs. Note the -J 1-992:**48**, Download the examples in [capacity.zip](capacity.zip), illustrating the above listed ways to run huge number of jobs. We recommend to try out the examples, before using this for running production jobs. -Unzip the archive in an empty directory on Anselm and follow the instructions in the README file +Unzip the archive in an empty directory on the cluster and follow the instructions in the README file ```console $ unzip capacity.zip diff --git a/docs.it4i/salomon/environment-and-modules.md b/docs.it4i/salomon/environment-and-modules.md index 5de3931c3d2b060d69a544343836c46caba20509..d52d0e2b56fc20ca5cc8576487ce74de33780b9e 100644 --- a/docs.it4i/salomon/environment-and-modules.md +++ b/docs.it4i/salomon/environment-and-modules.md @@ -64,13 +64,13 @@ The modules may be loaded, unloaded and switched, according to momentary needs. To check available modules use ```console -$ module avail **or** ml av +$ ml av ``` To load a module, for example the Open MPI module use ```console -$ module load OpenMPI **or** ml OpenMPI +$ ml OpenMPI ``` loading the Open MPI module will set up paths and environment variables of your active shell such that you are ready to run the Open MPI software @@ -78,13 +78,13 @@ loading the Open MPI module will set up paths and environment variables of your To check loaded modules use ```console -$ module list **or** ml +$ ml ``` To unload a module, for example the Open MPI module use ```console -$ module unload OpenMPI **or** ml -OpenMPI +$ ml -OpenMPI ``` Learn more on modules by reading the module man page diff --git a/docs.it4i/salomon/ib-single-plane-topology.md b/docs.it4i/salomon/ib-single-plane-topology.md index 859858c83b4333cd9fdad3383ba40694da8d9d27..e8f72801a64ad08b4da948e990030bda3ef04e1a 100644 --- a/docs.it4i/salomon/ib-single-plane-topology.md +++ b/docs.it4i/salomon/ib-single-plane-topology.md @@ -1,4 +1,4 @@ -# IB single-plane topology +# IB Single-Plane Topology A complete M-Cell assembly consists of four compute racks. Each rack contains 4 x physical IRUs - Independent rack units. Using one dual socket node per one blade slot leads to 8 logical IRUs. Each rack contains 4 x 2 SGI ICE X IB Premium Blades. diff --git a/docs.it4i/salomon/job-priority.md b/docs.it4i/salomon/job-priority.md index 5455a1ac4df144a9defc62d7a6511b3890bac5e7..3083c8396b73b91e0282d950ca7ac888f7a64153 100644 --- a/docs.it4i/salomon/job-priority.md +++ b/docs.it4i/salomon/job-priority.md @@ -1,4 +1,4 @@ -# Job scheduling +# Job Scheduling ## Job Execution Priority diff --git a/docs.it4i/salomon/job-submission-and-execution.md b/docs.it4i/salomon/job-submission-and-execution.md index 9c7ce35a6ba00c469e2b2c63a480e42e5b36e85c..20fc147e7c43be7a344302dbce663218b65870d1 100644 --- a/docs.it4i/salomon/job-submission-and-execution.md +++ b/docs.it4i/salomon/job-submission-and-execution.md @@ -1,4 +1,4 @@ -# Job submission and execution +# Job Submission and Execution ## Job Submission @@ -69,6 +69,32 @@ $ qsub -A OPEN-0-0 -I -q qlong -l select=4:ncpus=24:accelerator=True:naccelerat In this example, we allocate 4 nodes, with 24 cores per node (totalling 96 cores), with 2 Xeon Phi 7120p cards per node (totalling 8 Phi cards), running interactive job for 56 hours. The accelerator model name was omitted. +#### Intel Xeon Phi - Queue QMIC + +Examples executions + +```console +-l select=1 +exec_vnode = (r21u05n581-mic0:naccelerators=1:ncpus=0) + +-l select=4 +(r21u05n581-mic0:naccelerators=1:ncpus=0)+(r21u05n581-mic1:naccelerators=1:ncpus=0)+(r21u06n582-mic0:naccelerators=1:ncpus=0)+(r21u06n582-mic1:naccelerators=1:ncpus=0) +-l select=4:naccelerators=1 +(r21u05n581-mic0:naccelerators=1:ncpus=0)+(r21u05n581-mic1:naccelerators=1:ncpus=0)+(r21u06n582-mic0:naccelerators=1:ncpus=0)+(r21u06n582-mic1:naccelerators=1:ncpus=0) + +-l select=1:naccelerators=2 +(r21u05n581-mic0:naccelerators=1+r21u05n581-mic1:naccelerators=1) + +-l select=2:naccelerators=2 +(r21u05n581-mic0:naccelerators=1+r21u05n581-mic1:naccelerators=1)+(r21u06n582-mic0:naccelerators=1+r21u06n582-mic1:naccelerators=1) + +-l select=1:ncpus=24:naccelerators=2 +(r22u32n610:ncpus=24+r22u32n610-mic0:naccelerators=1+r22u32n610-mic1:naccelerators=1) + +-l select=1:ncpus=24:naccelerators=0+4 +(r33u17n878:ncpus=24:naccelerators=0)+(r33u13n874-mic0:naccelerators=1:ncpus=0)+(r33u13n874-mic1:naccelerators=1:ncpus=0)+(r33u16n877-mic0:naccelerators=1:ncpus=0)+(r33u16n877-mic1:naccelerators=1:ncpus=0) +``` + ### UV2000 SMP !!! note @@ -82,13 +108,25 @@ The UV2000 (node uv1) offers 3TB of RAM and 104 cores, distributed in 13 NUMA no $ qsub -A OPEN-0-0 -q qfat -l select=13 ./myjob ``` -In this example, we allocate all 13 NUMA nodes (corresponds to 13 chunks), 104 cores of the SGI UV2000 node for 72 hours. Jobscript myjob will be executed on the node uv1. +In this example, we allocate all 13 NUMA nodes (corresponds to 13 chunks), 104 cores of the SGI UV2000 node for 24 hours. Jobscript myjob will be executed on the node uv1. ```console $ qsub -A OPEN-0-0 -q qfat -l select=1:mem=2000GB ./myjob ``` -In this example, we allocate 2000GB of memory on the UV2000 for 72 hours. By requesting 2000GB of memory, 10 chunks are allocated. Jobscript myjob will be executed on the node uv1. +In this example, we allocate 2000GB of memory on the UV2000 for 24 hours. By requesting 2000GB of memory, memory from 10 chunks and 8 cores are allocated. Jobscript myjob will be executed on the node uv1. + +```console +$ qsub -A OPEN-0-0 -q qfat -l select=1:mem=3099GB,walltime=48:00:00 ./myjob +``` + +In this example, we allocate 3099GB of memory on the UV2000 for 48 hours. By requesting 3099GB of memory, memory from all 13 chunks and 8 cores are allocated. Jobscript myjob will be executed on the node uv1. + +```console +$ qsub -A OPEN-0-0 -q qfat -l select=2:mem=1000GB,walltime=48:00:00 ./myjob +``` + +In this example, we allocate 2000GB of memory and 16 cores on the UV2000 for 48 hours. By requesting 1000GB of memory per chunk, 2000GB of memory and 16 cores are allocated. Jobscript myjob will be executed on the node uv1. ### Useful Tricks diff --git a/docs.it4i/salomon/prace.md b/docs.it4i/salomon/prace.md index a3c80fa840dd1ff4ccb0dd17cd1f3d82001bdbdb..fe6b1dfd24c08cca1b3e1b3e1f6d931b6bf89a26 100644 --- a/docs.it4i/salomon/prace.md +++ b/docs.it4i/salomon/prace.md @@ -46,11 +46,11 @@ To check whether your proxy certificate is still valid (by default it's valid 12 $ grid-proxy-info ``` -To access Salomon cluster, two login nodes running GSI SSH service are available. The service is available from public Internet as well as from the internal PRACE network (accessible only from other PRACE partners). +To access Salomon cluster, four login nodes running GSI SSH service are available. The service is available from public Internet as well as from the internal PRACE network (accessible only from other PRACE partners). #### Access From PRACE Network: -It is recommended to use the single DNS name salomon-prace.it4i.cz which is distributed between the two login nodes. If needed, user can login directly to one of the login nodes. The addresses are: +It is recommended to use the single DNS name salomon-prace.it4i.cz which is distributed between the four login nodes. If needed, user can login directly to one of the login nodes. The addresses are: | Login address | Port | Protocol | Login node | | ---------------------------- | ---- | -------- | -------------------------------- | @@ -72,7 +72,7 @@ $ gsissh `prace_service -i -s salomon` #### Access From Public Internet: -It is recommended to use the single DNS name salomon.it4i.cz which is distributed between the two login nodes. If needed, user can login directly to one of the login nodes. The addresses are: +It is recommended to use the single DNS name salomon.it4i.cz which is distributed between the four login nodes. If needed, user can login directly to one of the login nodes. The addresses are: | Login address | Port | Protocol | Login node | | ---------------------------- | ---- | -------- | -------------------------------- | diff --git a/docs.it4i/salomon/resource-allocation-and-job-execution.md b/docs.it4i/salomon/resource-allocation-and-job-execution.md index c960f3871cc94efbaa070eadae51775b00689405..b1ab38d58d926f817537a986d3d912a27f5752ca 100644 --- a/docs.it4i/salomon/resource-allocation-and-job-execution.md +++ b/docs.it4i/salomon/resource-allocation-and-job-execution.md @@ -10,6 +10,7 @@ The resources are allocated to the job in a fair-share fashion, subject to const * **qprod**, the Production queue * **qlong**, the Long queue * **qmpp**, the Massively parallel queue +* **qmic**, the 864 MIC nodes queue * **qfat**, the queue to access SMP UV2000 machine * **qfree**, the Free resource utilization queue diff --git a/docs.it4i/salomon/resources-allocation-policy.md b/docs.it4i/salomon/resources-allocation-policy.md index 2408b8fb8adbb187a39235999eb75d53c4d53da8..95f3a016ae06524b59372af5b79607f03657cdd6 100644 --- a/docs.it4i/salomon/resources-allocation-policy.md +++ b/docs.it4i/salomon/resources-allocation-policy.md @@ -1,5 +1,7 @@ # Resources Allocation Policy +## Job Queue Policies + The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. The fair-share at Anselm ensures that individual users may consume approximately equal amount of resources per week. Detailed information in the [Job scheduling](job-priority/) section. The resources are accessible via several queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. Following table provides the queue partitioning overview: !!! note @@ -11,12 +13,13 @@ The resources are allocated to the job in a fair-share fashion, subject to const | **qprod** Production queue | yes | > 0 | 1006 nodes, max 86 per job | 24 | 0 | no | 24 / 48h | | **qlong** Long queue | yes | > 0 | 256 nodes, max 40 per job, only non-accelerated nodes allowed | 24 | 0 | no | 72 / 144h | | **qmpp** Massive parallel queue | yes | > 0 | 1006 nodes | 24 | 0 | yes | 2 / 4h | -| **qfat** UV2000 queue | yes | > 0 | 1 (uv1) | 8 | 0 | yes | 24 / 48h | -| **qfree** Free resource queue | yes | none required | 752 nodes, max 86 per job | 24 | -1024 | no | 12 / 12h | +| **qfat** UV2000 queue | yes | > 0 | 1 (uv1) | 8 | 200 | yes | 24 / 48h | +| **qfree** Free resource queue | yes | none required | 752 nodes, max 86 per job | 24 | -1024 | no | 12 / 12h | | **qviz** Visualization queue | yes | none required | 2 (with NVIDIA Quadro K5000) | 4 | 150 | no | 1 / 8h | +| **qmic** Intel Xeon Phi cards | yes | > 0 | 864 Intel Xeon Phi cards, max 8 mic per job | 0 | 0 | no | 24 / 48h | !!! note - **The qfree queue is not free of charge**. [Normal accounting](resources-allocation-policy/#resources-accounting-policy) applies. However, it allows for utilization of free resources, once a Project exhausted all its allocated computational resources. This does not apply for Directors Discreation's projects (DD projects) by default. Usage of qfree after exhaustion of DD projects computational resources is allowed after request for this queue. + **The qfree queue is not free of charge**. [Normal accounting](#resource-accounting-policy) applies. However, it allows for utilization of free resources, once a Project exhausted all its allocated computational resources. This does not apply to Directors Discretion (DD projects) but may be allowed upon request. * **qexp**, the Express queue: This queue is dedicated for testing and running very small jobs. It is not required to specify a project to enter the qexp. There are 2 nodes always reserved for this queue (w/o accelerator), maximum 8 nodes are available via the qexp for a particular user. The nodes may be allocated on per core basis. No special authorization is required to use it. The maximum runtime in qexp is 1 hour. * **qprod**, the Production queue: This queue is intended for normal production runs. It is required that active project with nonzero remaining resources is specified to enter the qprod. All nodes may be accessed via the qprod queue, however only 86 per job. Full nodes, 24 cores per node are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qprod is 48 hours. @@ -25,15 +28,16 @@ The resources are allocated to the job in a fair-share fashion, subject to const * **qfat**, the UV2000 queue. This queue is dedicated to access the fat SGI UV2000 SMP machine. The machine (uv1) has 112 Intel IvyBridge cores at 3.3GHz and 3.25TB RAM (8 cores and 128GB RAM are dedicated for system). An PI needs explicitly ask support for authorization to enter the queue for all users associated to her/his Project. * **qfree**, the Free resource queue: The queue qfree is intended for utilization of free resources, after a Project exhausted all its allocated computational resources (Does not apply to DD projects by default. DD projects have to request for persmission on qfree after exhaustion of computational resources.). It is required that active project is specified to enter the queue, however no remaining resources are required. Consumed resources will be accounted to the Project. Only 178 nodes without accelerator may be accessed from this queue. Full nodes, 24 cores per node are allocated. The queue runs with very low priority and no special authorization is required to use it. The maximum runtime in qfree is 12 hours. * **qviz**, the Visualization queue: Intended for pre-/post-processing using OpenGL accelerated graphics. Currently when accessing the node, each user gets 4 cores of a CPU allocated, thus approximately 73 GB of RAM and 1/7 of the GPU capacity (default "chunk"). If more GPU power or RAM is required, it is recommended to allocate more chunks (with 4 cores each) up to one whole node per user, so that all 28 cores, 512 GB RAM and whole GPU is exclusive. This is currently also the maximum allowed allocation per one user. One hour of work is allocated by default, the user may ask for 2 hours maximum. +* **qmic**, the queue qmic to access MIC nodes. It is required that active project with nonzero remaining resources is specified to enter the qmic. All 864 MICs are included. !!! note To access node with Xeon Phi co-processor user needs to specify that in [job submission select statement](job-submission-and-execution/). -## Notes +## Queue Notes -The job wall clock time defaults to **half the maximum time**, see table above. Longer wall time limits can be [set manually, see examples](job-submission-and-execution/). +The job wall-clock time defaults to **half the maximum time**, see table above. Longer wall time limits can be [set manually, see examples](job-submission-and-execution/). -Jobs that exceed the reserved wall clock time (Req'd Time) get killed automatically. Wall clock time limit can be changed for queuing jobs (state Q) using the qalter command, however can not be changed for a running job (state R). +Jobs that exceed the reserved wall-clock time (Req'd Time) get killed automatically. Wall-clock time limit can be changed for queuing jobs (state Q) using the qalter command, however can not be changed for a running job (state R). Salomon users may check current queue configuration at <https://extranet.it4i.cz/rsweb/salomon/queues>. @@ -109,24 +113,6 @@ Options: --incl-finished Include finished jobs ``` -## Resources Accounting Policy - -### Core-Hours - -The resources that are currently subject to accounting are the core-hours. The core-hours are accounted on the wall clock basis. The accounting runs whenever the computational cores are allocated or blocked via the PBS Pro workload manager (the qsub command), regardless of whether the cores are actually used for any calculation. 1 core-hour is defined as 1 processor core allocated for 1 hour of wall clock time. Allocating a full node (24 cores) for 1 hour accounts to 24 core-hours. See example in the [Job submission and execution](job-submission-and-execution/) section. +---8<--- "resource_accounting.md" -### Check Consumed Resources - -!!! note - The **it4ifree** command is a part of it4i.portal.clients package, located here: <https://pypi.python.org/pypi/it4i.portal.clients> - -User may check at any time, how many core-hours have been consumed by himself/herself and his/her projects. The command is available on clusters' login nodes. - -```console -$ it4ifree -Password: - PID Total Used ...by me Free - -------- ------- ------ -------- ------- - OPEN-0-0 1500000 400644 225265 1099356 - DD-13-1 10000 2606 2606 7394 -``` +---8<--- "mathjax.md" diff --git a/docs.it4i/salomon/shell-and-data-access.md b/docs.it4i/salomon/shell-and-data-access.md index c3aad60a094512084e56bd3b3f68f082bda37ee5..c02f2bc187caf8e5472c6276944e6a52eee8f78e 100644 --- a/docs.it4i/salomon/shell-and-data-access.md +++ b/docs.it4i/salomon/shell-and-data-access.md @@ -19,9 +19,17 @@ The authentication is by the [private key](../general/accessing-the-clusters/she !!! note Please verify SSH fingerprints during the first logon. They are identical on all login nodes: + + md5: + f6:28:98:e4:f9:b2:a6:8f:f2:f4:2d:0a:09:67:69:80 (DSA) 70:01:c9:9a:5d:88:91:c7:1b:c0:84:d1:fa:4e:83:5c (RSA) + sha256: + + epkqEU2eFzXnMeMMkpX02CykyWjGyLwFj528Vumpzn4 (DSA) + WNIrR7oeQDYpBYy4N2d5A6cJ2p0837S7gzzTpaDBZrc (RSA) + Private key authentication: On **Linux** or **Mac**, use diff --git a/docs.it4i/salomon/software/ansys/ansys-cfx.md b/docs.it4i/salomon/software/ansys/ansys-cfx.md deleted file mode 100644 index 21ce8f93b16958a184d15af5235830e9d39406b9..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/ansys/ansys-cfx.md +++ /dev/null @@ -1,55 +0,0 @@ -# ANSYS CFX - -[ANSYS CFX](http://www.ansys.com/products/fluids/ansys-cfx) software is a high-performance, general purpose fluid dynamics program that has been applied to solve wide-ranging fluid flow problems for over 20 years. At the heart of ANSYS CFX is its advanced solver technology, the key to achieving reliable and accurate solutions quickly and robustly. The modern, highly parallelized solver is the foundation for an abundant choice of physical models to capture virtually any type of phenomena related to fluid flow. The solver and its many physical models are wrapped in a modern, intuitive, and flexible GUI and user environment, with extensive capabilities for customization and automation using session files, scripting and a powerful expression language. - -To run ANSYS CFX in batch mode you can utilize/modify the default cfx.pbs script and execute it via the qsub command. - -```bash -#!/bin/bash -#PBS -l nodes=2:ppn=16 -#PBS -q qprod -#PBS -N $USER-CFX-Project -#PBS -A XX-YY-ZZ - -#! Mail to user when job terminate or abort -#PBS -m ae - -#!change the working directory (default is home directory) -#cd <working directory> (working directory must exists) -WORK_DIR="/scratch/$USER/work" -cd $WORK_DIR - -echo Running on host `hostname` -echo Time is `date` -echo Directory is `pwd` -echo This jobs runs on the following processors: -echo `cat $PBS_NODEFILE` - -module load ansys - -#### Set number of processors per host listing -#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) -procs_per_host=1 -#### Create host list -hl="" -for host in `cat $PBS_NODEFILE` -do - if ["$hl" = "" ] - then hl="$host:$procs_per_host" - else hl="${hl}:$host:$procs_per_host" - fi -done - -echo Machines: $hl - -#-dev input.def includes the input of CFX analysis in DEF format -#-P the name of prefered license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial)) -/ansys_inc/v145/CFX/bin/cfx5solve -def input.def -size 4 -size-ni 4x -part-large -start-method "Platform MPI Distributed Parallel" -par-dist $hl -P aa_r -``` - -Header of the pbs file (above) is common and description can be find on [this site](../../job-submission-and-execution/). SVS FEM recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. - -Working directory has to be created before sending pbs job into the queue. Input file should be in working directory or full path to input file has to be specified. >Input file has to be defined by common CFX def file which is attached to the cfx solver via parameter -def - -**License** should be selected by parameter -P (Big letter **P**). Licensed products are the following: aa_r (ANSYS **Academic** Research), ane3fl (ANSYS Multiphysics)-**Commercial**. -[More about licensing here](licensing/) diff --git a/docs.it4i/salomon/software/ansys/ansys-fluent.md b/docs.it4i/salomon/software/ansys/ansys-fluent.md deleted file mode 100644 index 27469a1c559355d1347ba3cfd76e303893caeb38..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/ansys/ansys-fluent.md +++ /dev/null @@ -1,162 +0,0 @@ -# ANSYS Fluent - -[ANSYS Fluent](http://www.ansys.com/products/fluids/ansys-fluent) -software contains the broad physical modeling capabilities needed to model flow, turbulence, heat transfer, and reactions for industrial applications ranging from air flow over an aircraft wing to combustion in a furnace, from bubble columns to oil platforms, from blood flow to semiconductor manufacturing, and from clean room design to wastewater treatment plants. Special models that give the software the ability to model in-cylinder combustion, aeroacoustics, turbomachinery, and multiphase systems have served to broaden its reach. - -1. Common way to run Fluent over pbs file - -To run ANSYS Fluent in batch mode you can utilize/modify the default fluent.pbs script and execute it via the qsub command. - -```bash -#!/bin/bash -#PBS -S /bin/bash -#PBS -l nodes=2:ppn=16 -#PBS -q qprod -#PBS -N $USER-Fluent-Project -#PBS -A XX-YY-ZZ - -#! Mail to user when job terminate or abort -#PBS -m ae - -#!change the working directory (default is home directory) -#cd <working directory> (working directory must exists) -WORK_DIR="/scratch/$USER/work" -cd $WORK_DIR - -echo Running on host `hostname` -echo Time is `date` -echo Directory is `pwd` -echo This jobs runs on the following processors: -echo `cat $PBS_NODEFILE` - -#### Load ansys module so that we find the cfx5solve command -module load ansys - -# Use following line to specify MPI for message-passing instead -NCORES=`wc -l $PBS_NODEFILE |awk '{print $1}'` - -/ansys_inc/v145/fluent/bin/fluent 3d -t$NCORES -cnf=$PBS_NODEFILE -g -i fluent.jou -``` - -Header of the pbs file (above) is common and description can be find on [this site](../../resources-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. - -Working directory has to be created before sending pbs job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common Fluent journal file which is attached to the Fluent solver via parameter -i fluent.jou - -Journal file with definition of the input geometry and boundary conditions and defined process of solution has e.g. the following structure: - -```console - /file/read-case aircraft_2m.cas.gz - /solve/init - init - /solve/iterate - 10 - /file/write-case-dat aircraft_2m-solution - /exit yes -``` - -The appropriate dimension of the problem has to be set by parameter (2d/3d). - -1. Fast way to run Fluent from command line - -```console -fluent solver_version [FLUENT_options] -i journal_file -pbs -``` - -This syntax will start the ANSYS FLUENT job under PBS Professional using the qsub command in a batch manner. When resources are available, PBS Professional will start the job and return a job ID, usually in the form of _job_ID.hostname_. This job ID can then be used to query, control, or stop the job using standard PBS Professional commands, such as qstat or qdel. The job will be run out of the current working directory, and all output will be written to the file fluent.o _job_ID_. - -1. Running Fluent via user's config file - -The sample script uses a configuration file called pbs_fluent.conf if no command line arguments are present. This configuration file should be present in the directory from which the jobs are submitted (which is also the directory in which the jobs are executed). The following is an example of what the content of pbs_fluent.conf can be: - -```console -input="example_small.flin" -case="Small-1.65m.cas" -fluent_args="3d -pmyrinet" -outfile="fluent_test.out" -mpp="true" -``` - -The following is an explanation of the parameters: - -input is the name of the input file. - -case is the name of the .cas file that the input file will utilize. - -fluent_args are extra ANSYS FLUENT arguments. As shown in the previous example, you can specify the interconnect by using the -p interconnect command. The available interconnects include ethernet (the default), myrinet, infiniband, vendor, altix, and crayx. The MPI is selected automatically, based on the specified interconnect. - -outfile is the name of the file to which the standard output will be sent. - -mpp="true" will tell the job script to execute the job across multiple processors. - -To run ANSYS Fluent in batch mode with user's config file you can utilize/modify the following script and execute it via the qsub command. - -```bash -#!/bin/sh -#PBS -l nodes=2:ppn=4 -#PBS -1 qprod -#PBS -N $USE-Fluent-Project -#PBS -A XX-YY-ZZ - - cd $PBS_O_WORKDIR - - #We assume that if they didn’t specify arguments then they should use the - #config file if ["xx${input}${case}${mpp}${fluent_args}zz" = "xxzz" ]; then - if [ -f pbs_fluent.conf ]; then - . pbs_fluent.conf - else - printf "No command line arguments specified, " - printf "and no configuration file found. Exiting n" - fi - fi - - - #Augment the ANSYS FLUENT command line arguments case "$mpp" in - true) - #MPI job execution scenario - num_nodes=â€cat $PBS_NODEFILE | sort -u | wc -l†- cpus=â€expr $num_nodes * $NCPUS†- #Default arguments for mpp jobs, these should be changed to suit your - #needs. - fluent_args="-t${cpus} $fluent_args -cnf=$PBS_NODEFILE" - ;; - *) - #SMP case - #Default arguments for smp jobs, should be adjusted to suit your - #needs. - fluent_args="-t$NCPUS $fluent_args" - ;; - esac - #Default arguments for all jobs - fluent_args="-ssh -g -i $input $fluent_args" - - echo "---------- Going to start a fluent job with the following settings: - Input: $input - Case: $case - Output: $outfile - Fluent arguments: $fluent_args" - - #run the solver - /ansys_inc/v145/fluent/bin/fluent $fluent_args > $outfile -``` - -It runs the jobs out of the directory from which they are submitted (PBS_O_WORKDIR). - -1. Running Fluent in parralel - -Fluent could be run in parallel only under Academic Research license. To do so this ANSYS Academic Research license must be placed before ANSYS CFD license in user preferences. To make this change anslic_admin utility should be run - -```console -/ansys_inc/shared_les/licensing/lic_admin/anslic_admin -``` - -ANSLIC_ADMIN Utility will be run - - - - - - - -ANSYS Academic Research license should be moved up to the top of the list. - - diff --git a/docs.it4i/salomon/software/ansys/ansys-ls-dyna.md b/docs.it4i/salomon/software/ansys/ansys-ls-dyna.md deleted file mode 100644 index 8646c26665ea9f10d6d70405e961f1e2efe7fbb9..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/ansys/ansys-ls-dyna.md +++ /dev/null @@ -1,55 +0,0 @@ -# ANSYS LS-DYNA - -**[ANSYSLS-DYNA](http://www.ansys.com/products/structures/ansys-ls-dyna)** software provides convenient and easy-to-use access to the technology-rich, time-tested explicit solver without the need to contend with the complex input requirements of this sophisticated program. Introduced in 1996, ANSYS LS-DYNA capabilities have helped customers in numerous industries to resolve highly intricate design issues. ANSYS Mechanical users have been able take advantage of complex explicit solutions for a long time utilizing the traditional ANSYS Parametric Design Language (APDL) environment. These explicit capabilities are available to ANSYS Workbench users as well. The Workbench platform is a powerful, comprehensive, easy-to-use environment for engineering simulation. CAD import from all sources, geometry cleanup, automatic meshing, solution, parametric optimization, result visualization and comprehensive report generation are all available within a single fully interactive modern graphical user environment. - -To run ANSYS LS-DYNA in batch mode you can utilize/modify the default ansysdyna.pbs script and execute it via the qsub command. - -```bash -#!/bin/bash -#PBS -l nodes=2:ppn=16 -#PBS -q qprod -#PBS -N $USER-DYNA-Project -#PBS -A XX-YY-ZZ - -#! Mail to user when job terminate or abort -#PBS -m ae - -#!change the working directory (default is home directory) -#cd <working directory> -WORK_DIR="/scratch/$USER/work" -cd $WORK_DIR - -echo Running on host `hostname` -echo Time is `date` -echo Directory is `pwd` -echo This jobs runs on the following processors: -echo `cat $PBS_NODEFILE` - -#! Counts the number of processors -NPROCS=`wc -l < $PBS_NODEFILE` - -echo This job has allocated $NPROCS nodes - -module load ansys - -#### Set number of processors per host listing -#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) -procs_per_host=1 -#### Create host list -hl="" -for host in `cat $PBS_NODEFILE` -do - if ["$hl" = "" ] - then hl="$host:$procs_per_host" - else hl="${hl}:$host:$procs_per_host" - fi -done - -echo Machines: $hl - -/ansys_inc/v145/ansys/bin/ansys145 -dis -lsdynampp i=input.k -machines $hl -``` - -Header of the pbs file (above) is common and description can be find on [this site](../../job-submission-and-execution/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. - -Working directory has to be created before sending pbs job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common LS-DYNA .**k** file which is attached to the ansys solver via parameter i= diff --git a/docs.it4i/salomon/software/ansys/ansys-mechanical-apdl.md b/docs.it4i/salomon/software/ansys/ansys-mechanical-apdl.md deleted file mode 100644 index c1562c1c23ca09fe308536c45f1c903ab8384b3e..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/ansys/ansys-mechanical-apdl.md +++ /dev/null @@ -1,56 +0,0 @@ -# ANSYS MAPDL - -**[ANSYS Multiphysics](http://www.ansys.com/products/multiphysics)** -software offers a comprehensive product solution for both multiphysics and single-physics analysis. The product includes structural, thermal, fluid and both high- and low-frequency electromagnetic analysis. The product also contains solutions for both direct and sequentially coupled physics problems including direct coupled-field elements and the ANSYS multi-field solver. - -To run ANSYS MAPDL in batch mode you can utilize/modify the default mapdl.pbs script and execute it via the qsub command. - -```bash -#!/bin/bash -#PBS -l nodes=2:ppn=16 -#PBS -q qprod -#PBS -N $USER-ANSYS-Project -#PBS -A XX-YY-ZZ - -#! Mail to user when job terminate or abort -#PBS -m ae - -#!change the working directory (default is home directory) -#cd <working directory> (working directory must exists) -WORK_DIR="/scratch/$USER/work" -cd $WORK_DIR - -echo Running on host `hostname` -echo Time is `date` -echo Directory is `pwd` -echo This jobs runs on the following processors: -echo `cat $PBS_NODEFILE` - -module load ansys - -#### Set number of processors per host listing -#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) -procs_per_host=1 -#### Create host list -hl="" -for host in `cat $PBS_NODEFILE` -do - if ["$hl" = "" ] - then hl="$host:$procs_per_host" - else hl="${hl}:$host:$procs_per_host" - fi -done - -echo Machines: $hl - -#-i input.dat includes the input of analysis in APDL format -#-o file.out is output file from ansys where all text outputs will be redirected -#-p the name of license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial), aa_r_dy=Academic AUTODYN) -/ansys_inc/v145/ansys/bin/ansys145 -b -dis -p aa_r -i input.dat -o file.out -machines $hl -dir $WORK_DIR -``` - -Header of the pbs file (above) is common and description can be find on [this site](../../resources-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. - -Working directory has to be created before sending pbs job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common APDL file which is attached to the ansys solver via parameter -i - -**License** should be selected by parameter -p. Licensed products are the following: aa_r (ANSYS **Academic** Research), ane3fl (ANSYS Multiphysics)-**Commercial**, aa_r_dy (ANSYS **Academic** AUTODYN) [More about licensing here](licensing/) diff --git a/docs.it4i/salomon/software/ansys/ansys.md b/docs.it4i/salomon/software/ansys/ansys.md deleted file mode 100644 index d7e0f2e1444ddc77dd861a4cce4eef06b4c78a6c..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/ansys/ansys.md +++ /dev/null @@ -1,15 +0,0 @@ -# Overview of ANSYS Products - -**[SVS FEM](http://www.svsfem.cz/)** as **[ANSYS Channel partner](http://www.ansys.com/)** for Czech Republic provided all ANSYS licenses for ANSELM cluster and supports of all ANSYS Products (Multiphysics, Mechanical, MAPDL, CFX, Fluent, Maxwell, LS-DYNA...) to IT staff and ANSYS users. If you are challenging to problem of ANSYS functionality contact please [hotline@svsfem.cz](mailto:hotline@svsfem.cz?subject=Ostrava%20-%20ANSELM) - -Anselm provides as commercial as academic variants. Academic variants are distinguished by "**Academic...**" word in the name of license or by two letter preposition "**aa\_**" in the license feature name. Change of license is realized on command line respectively directly in user's pbs file (see individual products). [More about licensing here](licensing/) - -To load the latest version of any ANSYS product (Mechanical, Fluent, CFX, MAPDL,...) load the module: - -```console -$ ml ansys -``` - -ANSYS supports interactive regime, but due to assumed solution of extremely difficult tasks it is not recommended. - -If user needs to work in interactive regime we recommend to configure the RSM service on the client machine which allows to forward the solution to the Anselm directly from the client's Workbench project (see ANSYS RSM service). diff --git a/docs.it4i/salomon/software/chemistry/nwchem.md b/docs.it4i/salomon/software/chemistry/nwchem.md deleted file mode 100644 index add429da99d2044e2ddaa64d29350e766c558bc2..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/chemistry/nwchem.md +++ /dev/null @@ -1,43 +0,0 @@ -# NWChem - -## Introduction - -NWChem aims to provide its users with computational chemistry tools that are scalable both in their ability to treat large scientific computational chemistry problems efficiently, and in their use of available parallel computing resources from high-performance parallel supercomputers to conventional workstation clusters. - -[Homepage](http://www.nwchem-sw.org/index.php/Main_Page) - -## Installed Versions - -The following versions are currently installed: - -* NWChem/6.3.revision2-2013-10-17-Python-2.7.8, current release. Compiled with Intel compilers, MKL and Intel MPI -* NWChem/6.5.revision26243-intel-2015b-2014-09-10-Python-2.7.8 - -For a current list of installed versions, execute: - -```console -$ ml av NWChem -``` - -The recommend to use version 6.5. Version 6.3 fails on Salomon nodes with accelerator, because it attempts to communicate over scif0 interface. In 6.5 this is avoided by setting ARMCI_OPENIB_DEVICE=mlx4_0, this setting is included in the module. - -## Running - - NWChem is compiled for parallel MPI execution. Normal procedure for MPI jobs applies. Sample jobscript : - -```bash - #PBS -A IT4I-0-0 - #PBS -q qprod - #PBS -l select=1:ncpus=24:mpiprocs=24 - - cd $PBS_O_WORKDIR - module add NWChem/6.5.revision26243-intel-2015b-2014-09-10-Python-2.7.8 - mpirun nwchem h2o.nw -``` - -## Options - -Please refer to [the documentation](http://www.nwchem-sw.org/index.php/Release62:Top-level) and in the input file set the following directives : - -* MEMORY : controls the amount of memory NWChem will use -* SCRATCH_DIR : set this to a directory in [SCRATCH filesystem](../../storage/storage/) (or run the calculation completely in a scratch directory). For certain calculations, it might be advisable to reduce I/O by forcing "direct" mode, eg. "scf direct" diff --git a/docs.it4i/salomon/software/debuggers/valgrind.md b/docs.it4i/salomon/software/debuggers/valgrind.md deleted file mode 100644 index 188f98502862effe90495934c6288aa64b042318..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/debuggers/valgrind.md +++ /dev/null @@ -1,265 +0,0 @@ -# Valgrind - -## About Valgrind - -Valgrind is an open-source tool, used mainly for debuggig memory-related problems, such as memory leaks, use of uninitalized memory etc. in C/C++ applications. The toolchain was however extended over time with more functionality, such as debugging of threaded applications, cache profiling, not limited only to C/C++. - -Valgind is an extremely useful tool for debugging memory errors such as [off-by-one](http://en.wikipedia.org/wiki/Off-by-one_error). Valgrind uses a virtual machine and dynamic recompilation of binary code, because of that, you can expect that programs being debugged by Valgrind run 5-100 times slower. - -The main tools available in Valgrind are : - -* **Memcheck**, the original, must used and default tool. Verifies memory access in you program and can detect use of unitialized memory, out of bounds memory access, memory leaks, double free, etc. -* **Massif**, a heap profiler. -* **Hellgrind** and **DRD** can detect race conditions in multi-threaded applications. -* **Cachegrind**, a cache profiler. -* **Callgrind**, a callgraph analyzer. -* For a full list and detailed documentation, please refer to the [official Valgrind documentation](http://valgrind.org/docs/). - -## Installed Versions - -There are two versions of Valgrind available on the cluster. - -* Version 3.8.1, installed by operating system vendor in /usr/bin/valgrind. This version is available by default, without the need to load any module. This version however does not provide additional MPI support. Also, it does not support AVX2 instructions, debugging of an AVX2-enabled executable with this version will fail -* Version 3.11.0 built by ICC with support for Intel MPI, available in module Valgrind/3.11.0-intel-2015b. After loading the module, this version replaces the default valgrind. -* Version 3.11.0 built by GCC with support for Open MPI, module Valgrind/3.11.0-foss-2015b - -## Usage - -Compile the application which you want to debug as usual. It is advisable to add compilation flags -g (to add debugging information to the binary so that you will see original source code lines in the output) and -O0 (to disable compiler optimizations). - -For example, lets look at this C code, which has two problems: - -```cpp - #include <stdlib.h> - - void f(void) - { - int* x = malloc(10 * sizeof(int)); - x[10] = 0; // problem 1: heap block overrun - } // problem 2: memory leak -- x not freed - - int main(void) - { - f(); - return 0; - } -``` - -Now, compile it with Intel compiler: - -```console -$ module add intel -$ icc -g valgrind-example.c -o valgrind-example -``` - -Now, lets run it with Valgrind. The syntax is: - -valgrind [valgrind options] < your program binary > [your program options] - -If no Valgrind options are specified, Valgrind defaults to running Memcheck tool. Please refer to the Valgrind documentation for a full description of command line options. - -```console -$ valgrind ./valgrind-example - ==12652== Memcheck, a memory error detector - ==12652== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. - ==12652== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info - ==12652== Command: ./valgrind-example - ==12652== - ==12652== Invalid write of size 4 - ==12652== at 0x40053E: f (valgrind-example.c:6) - ==12652== by 0x40054E: main (valgrind-example.c:11) - ==12652== Address 0x5861068 is 0 bytes after a block of size 40 alloc'd - ==12652== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) - ==12652== by 0x400528: f (valgrind-example.c:5) - ==12652== by 0x40054E: main (valgrind-example.c:11) - ==12652== - ==12652== - ==12652== HEAP SUMMARY: - ==12652== in use at exit: 40 bytes in 1 blocks - ==12652== total heap usage: 1 allocs, 0 frees, 40 bytes allocated - ==12652== - ==12652== LEAK SUMMARY: - ==12652== definitely lost: 40 bytes in 1 blocks - ==12652== indirectly lost: 0 bytes in 0 blocks - ==12652== possibly lost: 0 bytes in 0 blocks - ==12652== still reachable: 0 bytes in 0 blocks - ==12652== suppressed: 0 bytes in 0 blocks - ==12652== Rerun with --leak-check=full to see details of leaked memory - ==12652== - ==12652== For counts of detected and suppressed errors, rerun with: -v - ==12652== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 6 from 6) -``` - -In the output we can see that Valgrind has detected both errors - the off-by-one memory access at line 5 and a memory leak of 40 bytes. If we want a detailed analysis of the memory leak, we need to run Valgrind with --leak-check=full option: - -```console -$ valgrind --leak-check=full ./valgrind-example - ==23856== Memcheck, a memory error detector - ==23856== Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al. - ==23856== Using Valgrind-3.6.0 and LibVEX; rerun with -h for copyright info - ==23856== Command: ./valgrind-example - ==23856== - ==23856== Invalid write of size 4 - ==23856== at 0x40067E: f (valgrind-example.c:6) - ==23856== by 0x40068E: main (valgrind-example.c:11) - ==23856== Address 0x66e7068 is 0 bytes after a block of size 40 alloc'd - ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) - ==23856== by 0x400668: f (valgrind-example.c:5) - ==23856== by 0x40068E: main (valgrind-example.c:11) - ==23856== - ==23856== - ==23856== HEAP SUMMARY: - ==23856== in use at exit: 40 bytes in 1 blocks - ==23856== total heap usage: 1 allocs, 0 frees, 40 bytes allocated - ==23856== - ==23856== 40 bytes in 1 blocks are definitely lost in loss record 1 of 1 - ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) - ==23856== by 0x400668: f (valgrind-example.c:5) - ==23856== by 0x40068E: main (valgrind-example.c:11) - ==23856== - ==23856== LEAK SUMMARY: - ==23856== definitely lost: 40 bytes in 1 blocks - ==23856== indirectly lost: 0 bytes in 0 blocks - ==23856== possibly lost: 0 bytes in 0 blocks - ==23856== still reachable: 0 bytes in 0 blocks - ==23856== suppressed: 0 bytes in 0 blocks - ==23856== - ==23856== For counts of detected and suppressed errors, rerun with: -v - ==23856== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 6 from 6) -``` - -Now we can see that the memory leak is due to the malloc() at line 6. - -## Usage With MPI - -Although Valgrind is not primarily a parallel debugger, it can be used to debug parallel applications as well. When launching your parallel applications, prepend the valgrind command. For example: - -```console -$ mpirun -np 4 valgrind myapplication -``` - -The default version without MPI support will however report a large number of false errors in the MPI library, such as: - -```console - ==30166== Conditional jump or move depends on uninitialised value(s) - ==30166== at 0x4C287E8: strlen (mc_replace_strmem.c:282) - ==30166== by 0x55443BD: I_MPI_Processor_model_number (init_interface.c:427) - ==30166== by 0x55439E0: I_MPI_Processor_arch_code (init_interface.c:171) - ==30166== by 0x558D5AE: MPID_nem_impi_init_shm_configuration (mpid_nem_impi_extensions.c:1091) - ==30166== by 0x5598F4C: MPID_nem_init_ckpt (mpid_nem_init.c:566) - ==30166== by 0x5598B65: MPID_nem_init (mpid_nem_init.c:489) - ==30166== by 0x539BD75: MPIDI_CH3_Init (ch3_init.c:64) - ==30166== by 0x5578743: MPID_Init (mpid_init.c:193) - ==30166== by 0x554650A: MPIR_Init_thread (initthread.c:539) - ==30166== by 0x553369F: PMPI_Init (init.c:195) - ==30166== by 0x4008BD: main (valgrind-example-mpi.c:18) -``` - -so it is better to use the MPI-enabled valgrind from module. The MPI versions requires library: - -$EBROOTVALGRIND/lib/valgrind/libmpiwrap-amd64-linux.so - -which must be included in the LD_PRELOAD environment variable. - -Lets look at this MPI example: - -```cpp - #include <stdlib.h> - #include <mpi.h> - - int main(int argc, char *argv[]) - { - int *data = malloc(sizeof(int)*99); - - MPI_Init(&argc, &argv); - MPI_Bcast(data, 100, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; - } -``` - -There are two errors - use of uninitialized memory and invalid length of the buffer. Lets debug it with valgrind : - -```console -$ module add intel impi -$ mpiicc -g valgrind-example-mpi.c -o valgrind-example-mpi -$ module add Valgrind/3.11.0-intel-2015b -$ mpirun -np 2 -env LD_PRELOAD $EBROOTVALGRIND/lib/valgrind/libmpiwrap-amd64-linux.so valgrind ./valgrind-example-mpi -``` - -Prints this output : (note that there is output printed for every launched MPI process) - -```console - ==31318== Memcheck, a memory error detector - ==31318== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. - ==31318== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info - ==31318== Command: ./valgrind-example-mpi - ==31318== - ==31319== Memcheck, a memory error detector - ==31319== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. - ==31319== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info - ==31319== Command: ./valgrind-example-mpi - ==31319== - valgrind MPI wrappers 31319: Active for pid 31319 - valgrind MPI wrappers 31319: Try MPIWRAP_DEBUG=help for possible options - valgrind MPI wrappers 31318: Active for pid 31318 - valgrind MPI wrappers 31318: Try MPIWRAP_DEBUG=help for possible options - ==31319== Unaddressable byte(s) found during client check request - ==31319== at 0x4E35974: check_mem_is_addressable_untyped (libmpiwrap.c:960) - ==31319== by 0x4E5D0FE: PMPI_Bcast (libmpiwrap.c:908) - ==31319== by 0x400911: main (valgrind-example-mpi.c:20) - ==31319== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd - ==31319== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) - ==31319== by 0x4007BC: main (valgrind-example-mpi.c:8) - ==31319== - ==31318== Uninitialised byte(s) found during client check request - ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) - ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) - ==31318== by 0x400911: main (valgrind-example-mpi.c:20) - ==31318== Address 0x6929040 is 0 bytes inside a block of size 396 alloc'd - ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) - ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) - ==31318== - ==31318== Unaddressable byte(s) found during client check request - ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) - ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) - ==31318== by 0x400911: main (valgrind-example-mpi.c:20) - ==31318== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd - ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) - ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) - ==31318== - ==31318== - ==31318== HEAP SUMMARY: - ==31318== in use at exit: 3,172 bytes in 67 blocks - ==31318== total heap usage: 191 allocs, 124 frees, 81,203 bytes allocated - ==31318== - ==31319== - ==31319== HEAP SUMMARY: - ==31319== in use at exit: 3,172 bytes in 67 blocks - ==31319== total heap usage: 175 allocs, 108 frees, 48,435 bytes allocated - ==31319== - ==31318== LEAK SUMMARY: - ==31318== definitely lost: 408 bytes in 3 blocks - ==31318== indirectly lost: 256 bytes in 1 blocks - ==31318== possibly lost: 0 bytes in 0 blocks - ==31318== still reachable: 2,508 bytes in 63 blocks - ==31318== suppressed: 0 bytes in 0 blocks - ==31318== Rerun with --leak-check=full to see details of leaked memory - ==31318== - ==31318== For counts of detected and suppressed errors, rerun with: -v - ==31318== Use --track-origins=yes to see where uninitialised values come from - ==31318== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 4 from 4) - ==31319== LEAK SUMMARY: - ==31319== definitely lost: 408 bytes in 3 blocks - ==31319== indirectly lost: 256 bytes in 1 blocks - ==31319== possibly lost: 0 bytes in 0 blocks - ==31319== still reachable: 2,508 bytes in 63 blocks - ==31319== suppressed: 0 bytes in 0 blocks - ==31319== Rerun with --leak-check=full to see details of leaked memory - ==31319== - ==31319== For counts of detected and suppressed errors, rerun with: -v - ==31319== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4) -``` - -We can see that Valgrind has reported use of unitialised memory on the master process (which reads the array to be broadcasted) and use of unaddresable memory on both processes. diff --git a/docs.it4i/salomon/software/java.md b/docs.it4i/salomon/software/java.md deleted file mode 100644 index 83c3738c0802e612ba84c25868771c44fa51a1ab..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/java.md +++ /dev/null @@ -1,25 +0,0 @@ -# Java - -Java is available on the cluster. Activate java by loading the Java module - -```console -$ ml Java -``` - -Note that the Java module must be loaded on the compute nodes as well, in order to run java on compute nodes. - -Check for java version and path - -```console -$ java -version -$ which java -``` - -With the module loaded, not only the runtime environment (JRE), but also the development environment (JDK) with the compiler is available. - -```console -$ javac -version -$ which javac -``` - -Java applications may use MPI for inter-process communication, in conjunction with Open MPI. Read more on <http://www.open-mpi.org/faq/?category=java>. This functionality is currently not supported on Anselm cluster. In case you require the java interface to MPI, please contact [cluster support](https://support.it4i.cz/rt/). diff --git a/docs.it4i/salomon/software/mpi/Running_OpenMPI.md b/docs.it4i/salomon/software/mpi/Running_OpenMPI.md deleted file mode 100644 index e2633236ac6624c7a41ed56496bacb9795158901..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/mpi/Running_OpenMPI.md +++ /dev/null @@ -1,203 +0,0 @@ -# Running OpenMPI - -## OpenMPI Program Execution - -The OpenMPI programs may be executed only via the PBS Workload manager, by entering an appropriate queue. On the cluster, the **OpenMPI 1.8.6** is OpenMPI based MPI implementation. - -### Basic Usage - -Use the mpiexec to run the OpenMPI code. - -Example: - -```console -$ qsub -q qexp -l select=4:ncpus=24 -I - qsub: waiting for job 15210.isrv5 to start - qsub: job 15210.isrv5 ready -$ pwd - /home/username -$ ml OpenMPI -$ mpiexec -pernode ./helloworld_mpi.x - Hello world! from rank 0 of 4 on host r1i0n17 - Hello world! from rank 1 of 4 on host r1i0n5 - Hello world! from rank 2 of 4 on host r1i0n6 - Hello world! from rank 3 of 4 on host r1i0n7 -``` - -Please be aware, that in this example, the directive **-pernode** is used to run only **one task per node**, which is normally an unwanted behaviour (unless you want to run hybrid code with just one MPI and 24 OpenMP tasks per node). In normal MPI programs **omit the -pernode directive** to run up to 24 MPI tasks per each node. - -In this example, we allocate 4 nodes via the express queue interactively. We set up the openmpi environment and interactively run the helloworld_mpi.x program. -Note that the executable helloworld_mpi.x must be available within the same path on all nodes. This is automatically fulfilled on the /home and /scratch filesystem. - -You need to preload the executable, if running on the local ramdisk /tmp filesystem - -```console -$ pwd - /tmp/pbs.15210.isrv5 -$ mpiexec -pernode --preload-binary ./helloworld_mpi.x - Hello world! from rank 0 of 4 on host r1i0n17 - Hello world! from rank 1 of 4 on host r1i0n5 - Hello world! from rank 2 of 4 on host r1i0n6 - Hello world! from rank 3 of 4 on host r1i0n7 -``` - -In this example, we assume the executable helloworld_mpi.x is present on compute node r1i0n17 on ramdisk. We call the mpiexec whith the **--preload-binary** argument (valid for openmpi). The mpiexec will copy the executable from r1i0n17 to the /tmp/pbs.15210.isrv5 directory on r1i0n5, r1i0n6 and r1i0n7 and execute the program. - -MPI process mapping may be controlled by PBS parameters. - -The mpiprocs and ompthreads parameters allow for selection of number of running MPI processes per node as well as number of OpenMP threads per MPI process. - -### One MPI Process Per Node - -Follow this example to run one MPI process per node, 24 threads per process. - -```console -$ qsub -q qexp -l select=4:ncpus=24:mpiprocs=1:ompthreads=24 -I -$ ml OpenMPI -$ mpiexec --bind-to-none ./helloworld_mpi.x -``` - -In this example, we demonstrate recommended way to run an MPI application, using 1 MPI processes per node and 24 threads per socket, on 4 nodes. - -### Two MPI Processes Per Node - -Follow this example to run two MPI processes per node, 8 threads per process. Note the options to mpiexec. - -```console -$ qsub -q qexp -l select=4:ncpus=24:mpiprocs=2:ompthreads=12 -I -$ ml OpenMPI -$ mpiexec -bysocket -bind-to-socket ./helloworld_mpi.x -``` - -In this example, we demonstrate recommended way to run an MPI application, using 2 MPI processes per node and 12 threads per socket, each process and its threads bound to a separate processor socket of the node, on 4 nodes - -### 24 MPI Processes Per Node - -Follow this example to run 24 MPI processes per node, 1 thread per process. Note the options to mpiexec. - -```console -$ qsub -q qexp -l select=4:ncpus=24:mpiprocs=24:ompthreads=1 -I -$ ml OpenMPI -$ mpiexec -bycore -bind-to-core ./helloworld_mpi.x -``` - -In this example, we demonstrate recommended way to run an MPI application, using 24 MPI processes per node, single threaded. Each process is bound to separate processor core, on 4 nodes. - -### OpenMP Thread Affinity - -!!! note - Important! Bind every OpenMP thread to a core! - -In the previous two examples with one or two MPI processes per node, the operating system might still migrate OpenMP threads between cores. You might want to avoid this by setting these environment variable for GCC OpenMP: - -```console -$ export GOMP_CPU_AFFINITY="0-23" -``` - -or this one for Intel OpenMP: - -```console -$ export KMP_AFFINITY=granularity=fine,compact,1,0 -``` - -As of OpenMP 4.0 (supported by GCC 4.9 and later and Intel 14.0 and later) the following variables may be used for Intel or GCC: - -```console -$ export OMP_PROC_BIND=true -$ export OMP_PLACES=cores -``` - -## OpenMPI Process Mapping and Binding - -The mpiexec allows for precise selection of how the MPI processes will be mapped to the computational nodes and how these processes will bind to particular processor sockets and cores. - -MPI process mapping may be specified by a hostfile or rankfile input to the mpiexec program. Altough all implementations of MPI provide means for process mapping and binding, following examples are valid for the openmpi only. - -### Hostfile - -Example hostfile - -```console - r1i0n17.smc.salomon.it4i.cz - r1i0n5.smc.salomon.it4i.cz - r1i0n6.smc.salomon.it4i.cz - r1i0n7.smc.salomon.it4i.cz -``` - -Use the hostfile to control process placement - -```console -$ mpiexec -hostfile hostfile ./helloworld_mpi.x - Hello world! from rank 0 of 4 on host r1i0n17 - Hello world! from rank 1 of 4 on host r1i0n5 - Hello world! from rank 2 of 4 on host r1i0n6 - Hello world! from rank 3 of 4 on host r1i0n7 -``` - -In this example, we see that ranks have been mapped on nodes according to the order in which nodes show in the hostfile - -### Rankfile - -Exact control of MPI process placement and resource binding is provided by specifying a rankfile - -Appropriate binding may boost performance of your application. - -Example rankfile - -```console - rank 0=r1i0n7.smc.salomon.it4i.cz slot=1:0,1 - rank 1=r1i0n6.smc.salomon.it4i.cz slot=0:* - rank 2=r1i0n5.smc.salomon.it4i.cz slot=1:1-2 - rank 3=r1i0n17.smc.salomon slot=0:1,1:0-2 - rank 4=r1i0n6.smc.salomon.it4i.cz slot=0:*,1:* -``` - -This rankfile assumes 5 ranks will be running on 4 nodes and provides exact mapping and binding of the processes to the processor sockets and cores - -Explanation: -rank 0 will be bounded to r1i0n7, socket1 core0 and core1 -rank 1 will be bounded to r1i0n6, socket0, all cores -rank 2 will be bounded to r1i0n5, socket1, core1 and core2 -rank 3 will be bounded to r1i0n17, socket0 core1, socket1 core0, core1, core2 -rank 4 will be bounded to r1i0n6, all cores on both sockets - -```console - $ mpiexec -n 5 -rf rankfile --report-bindings ./helloworld_mpi.x - [r1i0n17:11180] MCW rank 3 bound to socket 0[core 1] socket 1[core 0-2]: [. B . . . . . . . . . .][B B B . . . . . . . . .] (slot list 0:1,1:0-2) - [r1i0n7:09928] MCW rank 0 bound to socket 1[core 0-1]: [. . . . . . . . . . . .][B B . . . . . . . . . .] (slot list 1:0,1) - [r1i0n6:10395] MCW rank 1 bound to socket 0[core 0-7]: [B B B B B B B B B B B B][. . . . . . . . . . . .] (slot list 0:*) - [r1i0n5:10406] MCW rank 2 bound to socket 1[core 1-2]: [. . . . . . . . . . . .][. B B . . . . . . . . .] (slot list 1:1-2) - [r1i0n6:10406] MCW rank 4 bound to socket 0[core 0-7] socket 1[core 0-7]: [B B B B B B B B B B B B][B B B B B B B B B B B B] (slot list 0:*,1:*) - Hello world! from rank 3 of 5 on host r1i0n17 - Hello world! from rank 1 of 5 on host r1i0n6 - Hello world! from rank 0 of 5 on host r1i0n7 - Hello world! from rank 4 of 5 on host r1i0n6 - Hello world! from rank 2 of 5 on host r1i0n5 -``` - -In this example we run 5 MPI processes (5 ranks) on four nodes. The rankfile defines how the processes will be mapped on the nodes, sockets and cores. The **--report-bindings** option was used to print out the actual process location and bindings. Note that ranks 1 and 4 run on the same node and their core binding overlaps. - -It is users responsibility to provide correct number of ranks, sockets and cores. - -### Bindings Verification - -In all cases, binding and threading may be verified by executing for example: - -```console -$ mpiexec -bysocket -bind-to-socket --report-bindings echo -$ mpiexec -bysocket -bind-to-socket numactl --show -$ mpiexec -bysocket -bind-to-socket echo $OMP_NUM_THREADS -``` - -## Changes in OpenMPI 1.8 - -Some options have changed in OpenMPI version 1.8. - -| version 1.6.5 | version 1.8.1 | -| ---------------- | ------------------- | -| --bind-to-none | --bind-to none | -| --bind-to-core | --bind-to core | -| --bind-to-socket | --bind-to socket | -| -bysocket | --map-by socket | -| -bycore | --map-by core | -| -pernode | --map-by ppr:1:node | diff --git a/docs.it4i/salomon/software/mpi/mpi4py-mpi-for-python.md b/docs.it4i/salomon/software/mpi/mpi4py-mpi-for-python.md deleted file mode 100644 index f957f5c1439272e3b65d069e68d055e52c4cc0b8..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/mpi/mpi4py-mpi-for-python.md +++ /dev/null @@ -1,109 +0,0 @@ -# MPI4Py (MPI for Python) - -OpenMPI interface to Python - -## Introduction - -MPI for Python provides bindings of the Message Passing Interface (MPI) standard for the Python programming language, allowing any Python program to exploit multiple processors. - -This package is constructed on top of the MPI-1/2 specifications and provides an object oriented interface which closely follows MPI-2 C++ bindings. It supports point-to-point (sends, receives) and collective (broadcasts, scatters, gathers) communications of any picklable Python object, as well as optimized communications of Python object exposing the single-segment buffer interface (NumPy arrays, builtin bytes/string/array objects). - -On Salomon MPI4Py is available in standard Python modules. - -## Modules - -MPI4Py is build for OpenMPI. Before you start with MPI4Py you need to load Python and OpenMPI modules. You can use toolchain, that loads Python and OpenMPI at once. - -```console -$ ml av Python/ ---------------------------------------- /apps/modules/lang ------------------------- - Python/2.7.8-intel-2015b Python/2.7.11-intel-2016a Python/3.5.1-intel-2017.00 - Python/2.7.11-intel-2017a Python/2.7.9-foss-2015b Python/2.7.9-intel-2015b - Python/2.7.11-foss-2016a Python/3.5.2-foss-2016a Python/3.5.1 - Python/2.7.9-foss-2015g Python/3.4.3-intel-2015b Python/2.7.9 - Python/2.7.11-intel-2015b Python/3.5.2 - -$ ml av OpenMPI/ ---------------------------------------- /apps/modules/mpi -------------------------- -OpenMPI/1.8.6-GCC-4.4.7-system OpenMPI/1.8.8-GNU-4.9.3-2.25 OpenMPI/1.10.1-GCC-4.9.3-2.25 -OpenMPI/1.8.6-GNU-5.1.0-2.25 OpenMPI/1.8.8-GNU-5.1.0-2.25 OpenMPI/1.10.1-GNU-4.9.3-2.25 - OpenMPI/1.8.8-iccifort-2015.3.187-GNU-4.9.3-2.25 OpenMPI/2.0.2-GCC-6.3.0-2.27 -``` - -!!! Warning "" - * modules Python/x.x.x-intel... - intel MPI - * modules Python/x.x.x-foss... - OpenMPI - * modules Python/x.x.x - without MPI - -## Execution - -You need to import MPI to your python program. Include the following line to the python script: - -```console - from mpi4py import MPI -``` - -The MPI4Py enabled python programs [execute as any other OpenMPI](Running_OpenMPI/) code.The simpliest way is to run - -```console -$ mpiexec python <script>.py -``` - -For example - -```console -$ mpiexec python hello_world.py -``` - -## Examples - -### Hello World! - -```cpp - from mpi4py import MPI - - comm = MPI.COMM_WORLD - - print "Hello! I'm rank %d from %d running in total..." % (comm.rank, comm.size) - - comm.Barrier() # wait for everybody to synchronize -``` - -### Collective Communication With NumPy Arrays - -```cpp - from __future__ import division - from mpi4py import MPI - import numpy as np - - comm = MPI.COMM_WORLD - - print("-"*78) - print(" Running on %d cores" % comm.size) - print("-"*78) - - comm.Barrier() - - # Prepare a vector of N=5 elements to be broadcasted... - N = 5 - if comm.rank == 0: - A = np.arange(N, dtype=np.float64) # rank 0 has proper data - else: - A = np.empty(N, dtype=np.float64) # all other just an empty array - - # Broadcast A from rank 0 to everybody - comm.Bcast( [A, MPI.DOUBLE] ) - - # Everybody should now have the same... - print "[%02d] %s" % (comm.rank, A) -``` - -Execute the above code as: - -```console -$ qsub -q qexp -l select=4:ncpus=24:mpiprocs=24:ompthreads=1 -I -$ ml Python/2.7.9-foss-2015g - $ mpiexec --map-by core --bind-to core python hello_world.py -``` - -In this example, we run MPI4Py enabled code on 4 nodes, 24 cores per node (total of 96 processes), each python process is bound to a different core. More examples and documentation can be found on [MPI for Python webpage](https://pypi.python.org/pypi/mpi4py). diff --git a/docs.it4i/salomon/software/numerical-languages/introduction.md b/docs.it4i/salomon/software/numerical-languages/introduction.md deleted file mode 100644 index 13ba67071a136612568b6772104f0c8c5430ba40..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/numerical-languages/introduction.md +++ /dev/null @@ -1,40 +0,0 @@ -# Numerical languages - -Interpreted languages for numerical computations and analysis - -## Introduction - -This section contains a collection of high-level interpreted languages, primarily intended for numerical computations. - -## Matlab - -MATLAB®^ is a high-level language and interactive environment for numerical computation, visualization, and programming. - -```console -$ ml MATLAB -$ matlab -``` - -Read more at the [Matlab page](matlab/). - -## Octave - -GNU Octave is a high-level interpreted language, primarily intended for numerical computations. The Octave language is quite similar to Matlab so that most programs are easily portable. - -```console -$ ml Octave -$ octave -``` - -Read more at the [Octave page](octave/). - -## R - -The R is an interpreted language and environment for statistical computing and graphics. - -```console -$ ml R -$ R -``` - -Read more at the [R page](r/). diff --git a/docs.it4i/salomon/software/numerical-languages/octave.md b/docs.it4i/salomon/software/numerical-languages/octave.md deleted file mode 100644 index 5c679dd1b87e587965d802f2845997b755254fa2..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/numerical-languages/octave.md +++ /dev/null @@ -1,56 +0,0 @@ -# Octave - -GNU Octave is a high-level interpreted language, primarily intended for numerical computations. It provides capabilities for the numerical solution of linear and nonlinear problems, and for performing other numerical experiments. It also provides extensive graphics capabilities for data visualization and manipulation. Octave is normally used through its interactive command line interface, but it can also be used to write non-interactive programs. The Octave language is quite similar to Matlab so that most programs are easily portable. Read more on <http://www.gnu.org/software/octave/> - -Two versions of octave are available on the cluster, via module - -| Status | Version | module | -| ---------- | ------------ | ------ | -| **Stable** | Octave 3.8.2 | Octave | - -```console -$ ml Octave -``` - -The octave on the cluster is linked to highly optimized MKL mathematical library. This provides threaded parallelization to many octave kernels, notably the linear algebra subroutines. Octave runs these heavy calculation kernels without any penalty. By default, octave would parallelize to 24 threads. You may control the threads by setting the OMP_NUM_THREADS environment variable. - -To run octave interactively, log in with ssh -X parameter for X11 forwarding. Run octave: - -```console -$ octave -``` - -To run octave in batch mode, write an octave script, then write a bash jobscript and execute via the qsub command. By default, octave will use 16 threads when running MKL kernels. - -```bash - #!/bin/bash - - # change to local scratch directory - mkdir -p /scratch/work/user/$USER/$PBS_JOBID - cd /scratch/work/user/$USER/$PBS_JOBID || exit - - # copy input file to scratch - cp $PBS_O_WORKDIR/octcode.m . - - # load octave module - module load Octave - - # execute the calculation - octave -q --eval octcode > output.out - - # copy output file to home - cp output.out $PBS_O_WORKDIR/. - - #exit - exit -``` - -This script may be submitted directly to the PBS workload manager via the qsub command. The inputs are in octcode.m file, outputs in output.out file. See the single node jobscript example in the [Job execution section](../../). - -The octave c compiler mkoctfile calls the GNU gcc 4.8.1 for compiling native c code. This is very useful for running native c subroutines in octave environment. - -```console -$ mkoctfile -v -``` - -Octave may use MPI for interprocess communication This functionality is currently not supported on the cluster cluster. In case you require the octave interface to MPI, please contact our [cluster support](https://support.it4i.cz/rt/). diff --git a/docs.it4i/salomon/software/numerical-libraries/.gitkeep b/docs.it4i/salomon/software/numerical-libraries/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/docs.it4i/salomon/software/numerical-libraries/Clp.md b/docs.it4i/salomon/software/numerical-libraries/Clp.md new file mode 100644 index 0000000000000000000000000000000000000000..d74af1dc2cf4b0170bafad91951fdc44e6ccc14c --- /dev/null +++ b/docs.it4i/salomon/software/numerical-libraries/Clp.md @@ -0,0 +1,61 @@ + +# Clp + +## Introduction + +Clp (Coin-or linear programming) is an open-source linear programming solver written in C++. It is primarily meant to be used as a callable library, but a basic, stand-alone executable version is also available. + +Clp ([projects.coin-or.org/Clp](https://projects.coin-or.org/Clp)) is a part of the COIN-OR (The Computational Infrastracture for Operations Research) project ([projects.coin-or.org/](https://projects.coin-or.org/)). + +## Modules + +Clp, version 1.16.10 is available on Salomon via module Clp: + +```console +$ ml Clp +``` + +The module sets up environment variables required for linking and running applications using Clp. This particular command loads the default module Clp/1.16.10-intel-2017a, Intel module intel/2017a and other related modules. + +## Compiling and Linking + +!!! note + Link with -lClp + +Load the Clp module. Link using -lClp switch to link your code against Clp. + +```console +$ ml Clp +$ icc myprog.c -o myprog.x -Wl,-rpath=$LIBRARY_PATH -lClp +``` + +## Example + +An example of Clp enabled application follows. In this example, the library solves linear programming problem loaded from file. + +```cpp +#include "coin/ClpSimplex.hpp" + +int main (int argc, const char *argv[]) +{ + ClpSimplex model; + int status; + if (argc<2) + status=model.readMps("/apps/all/Clp/1.16.10-intel-2017a/lib/p0033.mps"); + else + status=model.readMps(argv[1]); + if (!status) { + model.primal(); + } + return 0; +} +``` + +### Load Modules and Compile: + +```console +ml Clp +icc lp.c -o lp.x -Wl,-rpath=$LIBRARY_PATH -lClp +``` + +In this example, the lp.c code is compiled using the Intel compiler and linked with Clp. To run the code, the Intel module has to be loaded. diff --git a/docs.it4i/salomon/software/operating-system.md b/docs.it4i/salomon/software/operating-system.md deleted file mode 100644 index f68a9a97aac216dd727e0973d3ac56754726b90a..0000000000000000000000000000000000000000 --- a/docs.it4i/salomon/software/operating-system.md +++ /dev/null @@ -1,5 +0,0 @@ -# Operating System - -The operating system on Salomon is Linux - [**CentOS 6.x**](https://en.wikipedia.org/wiki/CentOS) - -The CentOS Linux distribution is a stable, predictable, manageable and reproducible platform derived from the sources of Red Hat Enterprise Linux (RHEL). diff --git a/docs.it4i/salomon/software/phys/LMGC90.md b/docs.it4i/salomon/software/phys/LMGC90.md new file mode 100644 index 0000000000000000000000000000000000000000..aa796c19d4ff17efbf20aba5eec622a50542c1bc --- /dev/null +++ b/docs.it4i/salomon/software/phys/LMGC90.md @@ -0,0 +1,76 @@ +# LMGC90 + +## Introduction + +LMGC90 is a free and open source software dedicated to multiple physics simulation of discrete material and structures. +More details on the capabilities of LMGC90 are available [here][Welcome]. + +## Modules + +The LMGC90, version 2017.rc1 is available on Salomon via module `LMGC90`: + +```sh +$ ml LMGC90 +``` + +The module sets up environment variables and loads some other modules, required for running LMGC90 python scripts. This particular command loads the default module, which is `LMGC90/2017.rc1-GCC-6.3.0-2.27`, and modules: + +```console +GCCcore/6.3.0 +binutils/2.27-GCCcore-6.3.0 +GCC/6.3.0-2.27 +bzip2/1.0.6 +zlib/1.2.8 +ncurses/6.0 +libreadline/6.3 +Tcl/8.6.3 +SQLite/3.8.8.1 +Python/2.7.9 +``` + +## Running Generic Example + +LMGC90 software main API is a Python module. It comes with a pre-processor written in Python. There are several examples that you can copy from the `examples` directory which is in `/apps/all/LMGC90/2017.rc1-GCC-6.3.0-2.27` folder. Follow the next steps to run one of them. + +First choose an example and open a terminal in the directory of the copied example. + +### Generation + +To have more information on the pre-processor open in a web navigator the file [docs/pre_lmgc/index.html][pre_lmgc]. + +To run an example, if there is no `DATBOX` directory or it is empty, run the Python generation script which is mostly called `gen_sample.py` with the command: + +```console +$ python gen_sample.py +``` + +You should now have a `DATBOX` directory containing all needed `.DAT` and `.INI` files. + +### Computation + +Now run the command script usually called `command.py`: + +```console +$ python command.py +``` + +To get more information on the structure on command scripts read the documentation opening the file [docs/chipy/index.html][chipy] in a web browser. +Once the computation is done, you should get the directory `OUTBOX` containing ASCII output files, and a `DISPLAY` directory with output file readable by paraview. + +### Postprocessing and Visualization + +The ASCII files in `POSTPRO` directory result from the commands in the `DATBOX/POSTPRO.DAT` file. To have more information on how to use these features read the documents [manuals/LMGC90_Postpro.pdf][LMGC90_Postpro.pdf]. +The files inside the `DISPLAY` directory can be visualized with paraview. It is advised to read the `.pvd` files which ensure time consistency. The different output files are: + +- tacts: contactors of rigid objects +- rigids: center of mass of rigid objects +- inter: interactions +- mecafe: mechanical mesh +- therfe: thermal mesh +- porofe: porous mechanical mesh +- multife: multi-phasic fluid in porous media mesh + +[Welcome]: <http://www.lmgc.univ-montp2.fr/~dubois/LMGC90/Web/Welcome_!.html> +[pre_lmgc]: <http://www.lmgc.univ-montp2.fr/%7Edubois/LMGC90/UserDoc/pre/index.html> +[chipy]: <http://www.lmgc.univ-montp2.fr/%7Edubois/LMGC90/UserDoc/chipy/index.html> +[LMGC90_Postpro.pdf]: <https://git-xen.lmgc.univ-montp2.fr/lmgc90/lmgc90_user/blob/2017.rc1/manuals/LMGC90_Postpro.pdf> diff --git a/docs.it4i/salomon/software/phys/PragTic.md b/docs.it4i/salomon/software/phys/PragTic.md new file mode 100644 index 0000000000000000000000000000000000000000..7a4f5ccfbf6c548fa23fef746b75ea173d8da683 --- /dev/null +++ b/docs.it4i/salomon/software/phys/PragTic.md @@ -0,0 +1,54 @@ +# PragTic + +## Introduction + +PragTic is a freeware tool for an automated fatigue damage calculation based on a FE-solution or done at an isolated point with no relation to FEA. More details of PragTic are available on its [website][Welcome]. Here is HPC version which was parallelized using MPI library. + +## Modules + +The PragTic, version 0.1 is available on Salomon via module PragTic: + +```sh +$ ml PragTic +``` + +The module sets up environment variables and loads some other modules, required for running PragTic simulations. This particular command loads the default module, which is PragTic/0.1-GCC-5.3.0-2.25, and modules: + +```console +GCCcore/5.3.0 +binutils/2.25-GCCcore-5.3.0 +GCC/5.3.0-2.25 +MPICH/3.2-GCC-5.3.0-2.25. +``` + +## Running + +Follow the next step to compute simulations sequentially: + +```sh +pragtic DATABASE ANSET SET_OF_POINTS LOAD_REGIME METHOD RESULT_FILE +``` + +and this step to compute it in parallel: + +```sh +mpirun -np NP pragtic DATABASE ANSET SET_OF_POINTS LOAD_REGIME METHOD RESULT_FILE +``` + +where + +| Parameter | Description | +| --------------- | -------------------------------------------- | +| *NP* | number of processes | +| *DATABASE* | .fdb database specially prepared for PragTic | +| *ANSET* | analyse setup name | +| *SET_OF_POINTS* | set of points name | +| *LOAD_REGIME* | load regime name | +| *METHOD* | method name | +| *RESULT_FILE* | file where results will be saved | + +## Results + +After computation newly created result file *RESULT_FILE* in the current directory should contain results. More detailed result informations then should be found in the file *res.txt* which is in every single randomly named folder created by PragTic in the very same current directory. + +[Welcome]: <http://www.pragtic.com/> diff --git a/docs.it4i/salomon/storage.md b/docs.it4i/salomon/storage.md index 23a7b6bcfd29b45a5f3b56f383133dd2c0036811..61adc35e7ab317a7dcc29d92e853a9ac01b6f6a4 100644 --- a/docs.it4i/salomon/storage.md +++ b/docs.it4i/salomon/storage.md @@ -9,9 +9,9 @@ All login and compute nodes may access same data on shared file systems. Compute ## Policy (In a Nutshell) !!! note - \* Use [HOME](#home) for your most valuable data and programs. - \* Use [WORK](#work) for your large project files. - \* Use [TEMP](#temp) for large scratch data. + \* Use [HOME](#home) for your most valuable data and programs. + \* Use [WORK](#work) for your large project files. + \* Use [TEMP](#temp) for large scratch data. !!! warning Do not use for [archiving](#archiving)! @@ -154,8 +154,8 @@ Entries: 6 # based on Robinhood ``` -In this example, we view current size limits and space occupied on the /home and /scratch filesystem, for a particular user executing the command. -Note that limits are imposed also on number of objects (files, directories, links, etc...) that are allowed to create. +In this example, we view current size limits and space occupied on the /home and /scratch filesystem, for a particular user executing the command. +Note that limits are imposed also on number of objects (files, directories, links, etc...) that are allowed to create. To have a better understanding of where the space is exactly used, you can use following command to find out. @@ -222,7 +222,7 @@ other::--- Default ACL mechanism can be used to replace setuid/setgid permissions on directories. Setting a default ACL on a directory (-d flag to setfacl) will cause the ACL permissions to be inherited by any newly created file or subdirectory within the directory. Refer to this page for more information on Linux ACL: -[http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html](http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html) +[redhat guide](https://access.redhat.com/documentation/en-US/Red_Hat_Storage/2.0/html/Administration_Guide/ch09s05.html) ## Shared Workspaces @@ -281,7 +281,7 @@ The TEMP workspace resides on SCRATCH file system. The TEMP workspace accesspoin Users are advised to save the necessary data from the TEMP workspace to HOME or WORK after the calculations and clean up the scratch files. -!!! warning +!!! warning Files on the TEMP file system that are **not accessed for more than 90 days** will be automatically **deleted**. The TEMP workspace is hosted on SCRATCH file system. The SCRATCH is realized as Lustre parallel file system and is available from all login and computational nodes. Default stripe size is 1 MB, stripe count is 1. There are 54 OSTs dedicated for the SCRATCH file system. @@ -302,34 +302,93 @@ The TEMP workspace is hosted on SCRATCH file system. The SCRATCH is realized as ## RAM Disk -Every computational node is equipped with file system realized in memory, so called RAM disk. +### Local RAM Disk -!!! note - Use RAM disk in case you need really fast access to your data of limited size during your calculation. Be very careful, use of RAM disk file system is at the expense of operational memory. +Every computational node is equipped with file system realized in memory, so called RAM disk. The local RAM disk is mounted as /ramdisk and is accessible to user at /ramdisk/$PBS_JOBID directory. -The local RAM disk file system is intended for temporary scratch data generated during the calculation as well as for high performance access to input and output files. Size of RAM disk file system is limited. Be very careful, use of RAM disk file system is at the expense of operational memory. It is not recommended to allocate large amount of memory and use large amount of data in RAM disk file system at the same time. +The RAM disk is private to a job and local to node, created when the job starts and deleted at the job end. !!! note The local RAM disk directory /ramdisk/$PBS_JOBID will be deleted immediately after the calculation end. Users should take care to save the output data from within the jobscript. -| RAM disk | | +The local RAM disk file system is intended for temporary scratch data generated during the calculation as well as +for high-performance access to input and output files. Size of RAM disk file system is limited. +It is not recommended to allocate large amount of memory and use large amount of data in RAM disk file system at the same time. + +!!! warning + Be very careful, use of RAM disk file system is at the expense of operational memory. + +| Local RAM disk | | | ----------- | ------------------------------------------------------------------------------------------------------- | | Mountpoint | /ramdisk | | Accesspoint | /ramdisk/$PBS_JOBID | -| Capacity | 120 GB | +| Capacity | 110 GB | | Throughput | over 1.5 GB/s write, over 5 GB/s read, single thread, over 10 GB/s write, over 50 GB/s read, 16 threads | | User quota | none | +### Global RAM Disk + +The Global RAM disk spans the local RAM disks of all the nodes within a single job. + + + +The Global RAM disk deploys +BeeGFS On Demand parallel filesystem, using local RAM disks as a storage backend. + +The Global RAM disk is mounted at /mnt/global_ramdisk. + +!!! note + The global RAM disk is on-demand. It has to be activated by **global_ramdisk=true** in the qsub command. + +```console +$ qsub -q qprod -l select=4,global_ramdisk=true ./jobscript +``` + +This command would submit 4 node job in qprod queue, once running a 440GB RAM disk shared across the 4 nodes will be created. +The RAM disk will be accessible at /mnt/global_ramdisk, files written to this RAM disk will be visible on all 4 nodes. + +The file system is private to a job and shared among the nodes, created when the job starts and deleted at the job end. + +!!! note + The Global RAM disk will be deleted immediately after the calculation end. Users should take care to save the output data from within the jobscript. + +The files on the Global RAM disk will be equally striped across all the nodes, using 512k stripe size. +Check the Global RAM disk status: + +```console +$ beegfs-df -p /mnt/global_ramdisk +$ beegfs-ctl --mount=/mnt/global_ramdisk --getentryinfo /mnt/global_ramdisk +``` + +Use Global RAM disk in case you need very large RAM disk space. The Global RAM disk allows for high performance sharing of data among compute nodes +within a job. + +!!! warning + Be very careful, use of Global RAM disk file system is at the expense of operational memory. + +| Global RAM disk | | +| ------------------ | --------------------------------------------------------------------------| +| Mountpoint | /mnt/global_ramdisk | +| Accesspoint | /mnt/global_ramdisk | +| Capacity | N*110 GB | +| Throughput | 3*(N+1) GB/s, 2GB/s single POSIX thread | +| User quota | none | + +N = number of compute nodes in the job. + ## Summary -| Mountpoint | Usage | Protocol | Net | Capacity | Throughput | Limitations | Access | -| ------------- | ------------------------------ | ----------- | ------- | -------- | ------------ | ----------------------- | --------------------------- | -| /home | home directory | NFS, 2-Tier | 0.5 PB | 6 GB/s | Quota 250GB | Compute and login nodes | backed up | -| /scratch/work | large project files | Lustre | 1.69 PB | 30 GB/s | Quota | Compute and login nodes | none | -| /scratch/temp | job temporary data | Lustre | 1.69 PB | 30 GB/s | Quota 100 TB | Compute and login nodes | files older 90 days removed | -| /ramdisk | job temporary data, node local | local | 120GB | 90 GB/s | none | Compute nodes | purged after job ends | +| Mountpoint | Usage | Protocol | Net Capacity| Throughput | Limitations | Access | Service | +| ------------------- | ------------------------------ | ----------- | ------------| -------------- | ------------ | --------------------------- | --------------------------- | +| /home | home directory | NFS, 2-Tier | 0.5 PB | 6 GB/s | Quota 250GB | Compute and login nodes | backed up | +| /scratch/work | large project files | Lustre | 1.69 PB | 30 GB/s | Quota | Compute and login nodes | none | +| /scratch/temp | job temporary data | Lustre | 1.69 PB | 30 GB/s | Quota 100 TB | Compute and login nodes | files older 90 days removed | +| /ramdisk | job temporary data, node local | tmpfs | 110GB | 90 GB/s | none | Compute nodes, node local | purged after job ends | +| /mnt/global_ramdisk | job temporary data | BeeGFS | N*110GB | 3*(N+1) GB/s | none | Compute nodes, job shared | purged after job ends | + +N = number of compute nodes in the job. ## CESNET Data Storage diff --git a/docs.it4i/snippets/modules_matrix_search.md b/docs.it4i/snippets/modules_matrix_search.md index b936364e15723838e1609be9cfb57df526496e85..4d91e6caf59329ca9d772f50226f78ff32038913 100644 --- a/docs.it4i/snippets/modules_matrix_search.md +++ b/docs.it4i/snippets/modules_matrix_search.md @@ -36,6 +36,6 @@ $("#searchInput").keyup(function () { "color": "black" }); }).css({ - "color": "#C0C0C0" + "color": "#C0C0C0" }); </script> diff --git a/docs.it4i/snippets/resource_accounting.md b/docs.it4i/snippets/resource_accounting.md new file mode 100644 index 0000000000000000000000000000000000000000..ac6dd7ca6b764b9d29079dd5d704cbffea2642d6 --- /dev/null +++ b/docs.it4i/snippets/resource_accounting.md @@ -0,0 +1,67 @@ +# Resource Accounting Policy + +## Wall-Clock Core-Hours WCH + +The wall-clock core-hours (WCH) are the basic metric of computer utilization time. +1 wall-clock core-hour is defined as 1 processor core allocated for 1 hour of wall-clock time. Allocating a full node (16 cores Anselm, 24 cores Salomon) +for 1 hour amounts to 16 wall-clock core-hours (Anselm) or 24 wall-clock core-hours (Salomon). + +## Normalized Core-Hours NCH + +The resources subject to accounting are the normalized core-hours (NCH). +The normalized core-hours are obtained from WCH by applying a normalization factor: + +$$ +NCH = F*WCH +$$ + +All jobs are accounted in normalized core-hours, using factor F valid at the time of the execution: + +| System | F | Validity | +| ------------------------------- | - | -------- | +| Salomon | 1.00 | 2017-09-11 to 2018-06-01 | +| Anselm | 0.65 | 2017-09-11 to 2018-06-01 | + +The accounting runs whenever the computational cores are allocated via the PBS Pro workload manager (the qsub command), regardless of whether +the cores are actually used for any calculation. + +!!! note + **The allocations are requested/granted in normalized core-hours NCH.** + +!!! warning + Whenever the term core-hour is used in this documentation, we mean the normalized core-hour, NCH. + +The normalized core-hours were introduced to treat systems of different age on equal footing. +Normalized core-hour is an accounting tool to discount the legacy systems. The past (before 2017-09-11) F factors are all 1.0. +In future, the factors F will be updated, as new systems are installed. Factors F are expected to only decrease in time. + +See examples in the [Job submission and execution](job-submission-and-execution/) section. + +## Consumed Resources + +Check how many core-hours have been consumed. The command it4ifree is available on cluster login nodes. + +```console +$ it4ifree + +Projects I am participating in +============================== +PID Days left Total Used WCHs Used NCHs WCHs by me NCHs by me Free +---------- ----------- ------- ----------- ----------- ------------ ------------ ------- +OPEN-XX-XX 323 0 5169947 5169947 50001 50001 1292555 + + +Projects I am Primarily Investigating +===================================== +PID Login Used WCHs Used NCHs +---------- ---------- ----------- ----------- +OPEN-XX-XX user1 376670 376670 + user2 4793277 4793277 + +Legend +====== +WCH = Wall-clock Core Hour +NCH = Normalized Core Hour +``` + +The **it4ifree** command is a part of it4i.portal.clients package, located here: <https://pypi.python.org/pypi/it4i.portal.clients> diff --git a/docs.it4i/software/bioinformatics.md b/docs.it4i/software/bio/bioinformatics.md similarity index 100% rename from docs.it4i/software/bioinformatics.md rename to docs.it4i/software/bio/bioinformatics.md diff --git a/docs.it4i/anselm/software/omics-master/diagnostic-component-team.md b/docs.it4i/software/bio/omics-master/diagnostic-component-team.md similarity index 96% rename from docs.it4i/anselm/software/omics-master/diagnostic-component-team.md rename to docs.it4i/software/bio/omics-master/diagnostic-component-team.md index d8d0c4fc4e26a25550cb96b6dbe16a7a587fecf5..ab091fd1e1615e820d0b660424b8fab39a36642f 100644 --- a/docs.it4i/anselm/software/omics-master/diagnostic-component-team.md +++ b/docs.it4i/software/bio/omics-master/diagnostic-component-team.md @@ -1,4 +1,4 @@ -# Diagnostic component (TEAM) +# Diagnostic Component (TEAM) ## Access @@ -13,6 +13,6 @@ VCF files are scanned by this diagnostic tool for known diagnostic disease-assoc TEAM (27) is an intuitive and easy-to-use web tool that fills the gap between the predicted mutations and the final diagnostic in targeted enrichment sequencing analysis. The tool searches for known diagnostic mutations, corresponding to a disease panel, among the predicted patient’s variants. Diagnostic variants for the disease are taken from four databases of disease-related variants (HGMD-public, HUMSAVAR , ClinVar and COSMIC) If no primary diagnostic variant is found, then a list of secondary findings that can help to establish a diagnostic is produced. TEAM also provides with an interface for the definition of and customization of panels, by means of which, genes and mutations can be added or discarded to adjust panel definitions. - + ** Figure 5. **Interface of the application. Panels for defining targeted regions of interest can be set up by just drag and drop known disease genes or disease definitions from the lists. Thus, virtual panels can be interactively improved as the knowledge of the disease increases. diff --git a/docs.it4i/anselm/software/omics-master/overview.md b/docs.it4i/software/bio/omics-master/overview.md similarity index 98% rename from docs.it4i/anselm/software/omics-master/overview.md rename to docs.it4i/software/bio/omics-master/overview.md index d09a0030cf06246720287c6d0ffad4bfd11825a6..e29f1daec829dd7af8a93409314a2caef755625d 100644 --- a/docs.it4i/anselm/software/omics-master/overview.md +++ b/docs.it4i/software/bio/omics-master/overview.md @@ -9,7 +9,7 @@ The scope of this OMICS MASTER solution is restricted to human genomics research The pipeline inputs the raw data produced by the sequencing machines and undergoes a processing procedure that consists on a quality control, the mapping and variant calling steps that result in a file containing the set of variants in the sample. From this point, the prioritization component or the diagnostic component can be launched.  +them, depending of the experimental design carried out.](../../img/fig1.png) Figure 1. OMICS MASTER solution overview. Data is produced in the external labs and comes to IT4I (represented by the blue dashed line). The data pre-processor converts raw data into a list of variants and annotations for each sequenced patient. These lists files together with primary and secondary (alignment) data files are stored in IT4I sequence DB and uploaded to the discovery (candidate prioritization) or diagnostic component where they can be analyzed directly by the user that produced them, depending of the experimental design carried out. @@ -41,7 +41,7 @@ Output: FASTQ file plus an HTML file containing statistics on the data. FASTQ format It represents the nucleotide sequence and its corresponding quality scores. - + Figure 2.FASTQ file. #### Mapping @@ -81,7 +81,7 @@ corresponding information is unavailable. The standard CIGAR description of pairwise alignment defines three operations: â€M’ for match/mismatch, â€I’ for insertion compared with the reference and â€D’ for deletion. The extended CIGAR proposed in SAM added four more operations: â€N’ for skipped bases on the reference, â€S’ for soft clipping, â€H’ for hard clipping and â€P’ for padding. These support splicing, clipping, multi-part and padded alignments. Figure 3 shows examples of CIGAR strings for different types of alignments. - + Figure 3 . SAM format file. The â€@SQ’ line in the header section gives the order of reference sequences. Notably, r001 is the name of a read pair. According to FLAG 163 (=1+2+32+128), the read mapped to position 7 is the second read in the pair (128) and regarded as properly paired (1 + 2); its mate is mapped to 37 on the reverse strand (32). Read r002 has three soft-clipped (unaligned) bases. The coordinate shown in SAM is the position of the first aligned base. The CIGAR string for this alignment contains a P (padding) operation which correctly aligns the inserted sequences. Padding operations can be absent when an aligner does not support multiple sequence alignment. The last six bases of read r003 map to position 9, and the first five to position 29 on the reverse strand. The hard clipping operation H indicates that the clipped sequence is not present in the sequence field. The NM tag gives the number of mismatches. Read r004 is aligned across an intron, indicated by the N operation. @@ -125,7 +125,7 @@ A VCF file consists of a header section and a data section. The header contains  +two bases by another base (SAMPLE2); the second line shows a SNP and an insertion; the third a SNP; the fourth a large structural variant described by the annotation in the INFO column, the coordinate is that of the base before the variant. (b–f ) Alignments and VCF representations of different sequence variants: SNP, insertion, deletion, replacement, and a large deletion. The REF columns shows the reference bases replaced by the haplotype in the ALT column. The coordinate refers to the first reference base. (g) Users are advised to use simplest representation possible and lowest coordinate in cases where the position is ambiguous.](../../img/fig4.png) Figure 4 . (a) Example of valid VCF. The header lines ##fileformat and #CHROM are mandatory, the rest is optional but strongly recommended. Each line of the body describes variants present in the sampled population at one genomic position or region. All alternate alleles are listed in the ALT column and referenced from the genotype fields as 1-based indexes to this list; the reference haplotype is designated as 0. For multiploid data, the separator indicates whether the data are phased (|) or unphased (/). Thus, the two alleles C and G at the positions 2 and 5 in this figure occur on the same chromosome in SAMPLE1. The first data line shows an example of a deletion (present in SAMPLE1) and a replacement of two bases by another base (SAMPLE2); the second line shows a SNP and an insertion; the third a SNP; the fourth a large structural variant described by the annotation in the INFO column, the coordinate is that of the base before the variant. (b–f ) Alignments and VCF representations of different sequence variants: SNP, insertion, deletion, replacement, and a large deletion. The REF columns shows the reference bases replaced by the haplotype in the ALT column. The coordinate refers to the first reference base. (g) Users are advised to use simplest representation possible and lowest coordinate in cases where the position is ambiguous. @@ -231,7 +231,7 @@ second one. --project>. Project ID of your supercomputer allocation. - --queue. [Queue](../../resources-allocation-policy/) to run the jobs in. + --queue. [Queue](../../salomon/resources-allocation-policy/) to run the jobs in. ``` Input, output and ped arguments are mandatory. If the output folder does not exist, the pipeline will create it. @@ -264,7 +264,7 @@ The ped file ( file.ped) contains the following info: FAM sample_B 0 0 2 2 ``` -Now, lets load the NGSPipeline module and copy the sample data to a [scratch directory](../../storage/storage/): +Now, lets load the NGSPipeline module and copy the sample data to a [scratch directory](../../salomon/storage/): ```console $ ml ngsPipeline @@ -278,7 +278,7 @@ Now, we can launch the pipeline (replace OPEN-0-0 with your Project ID): $ ngsPipeline -i /scratch/$USER/omics/sample_data/data -o /scratch/$USER/omics/results -p /scratch/$USER/omics/sample_data/data/file.ped --project OPEN-0-0 --queue qprod ``` -This command submits the processing [jobs to the queue](../../job-submission-and-execution/). +This command submits the processing [jobs to the queue](../../salomon/job-submission-and-execution/). If we want to re-launch the pipeline from stage 4 until stage 20 we should use the next command: @@ -336,25 +336,25 @@ This listing show which tools are used in each step of the pipeline The output folder contains all the subfolders with the intermediate data. This folder contains the final VCF with all the variants. This file can be uploaded into [TEAM](diagnostic-component-team/) by using the VCF file button. It is important to note here that the entire management of the VCF file is local: no patient’s sequence data is sent over the Internet thus avoiding any problem of data privacy or confidentiality. -![TEAM upload panel. Once the file has been uploaded, a panel must be chosen from the Panel list. Then, pressing the Run button the diagnostic process starts.]\((../../../img/fig7.png) +![TEAM upload panel. Once the file has been uploaded, a panel must be chosen from the Panel list. Then, pressing the Run button the diagnostic process starts.]\((../../img/fig7.png) Figure 7. _TEAM upload panel._ _Once the file has been uploaded, a panel must be chosen from the Panel_ list. Then, pressing the Run button the diagnostic process starts. Once the file has been uploaded, a panel must be chosen from the Panel list. Then, pressing the Run button the diagnostic process starts. TEAM searches first for known diagnostic mutation(s) taken from four databases: HGMD-public (20), [HUMSAVAR](http://www.uniprot.org/docs/humsavar), ClinVar (29) and COSMIC (23). - + Figure 7. The panel manager. The elements used to define a panel are ( A ) disease terms, ( B ) diagnostic mutations and ( C ) genes. Arrows represent actions that can be taken in the panel manager. Panels can be defined by using the known mutations and genes of a particular disease. This can be done by dragging them to the Primary Diagnostic box (action D ). This action, in addition to defining the diseases in the Primary Diagnostic box, automatically adds the corresponding genes to the Genes box. The panels can be customized by adding new genes (action F ) or removing undesired genes (action G). New disease mutations can be added independently or associated to an already existing disease term (action E ). Disease terms can be removed by simply dragging them back (action H ). For variant discovering/filtering we should upload the VCF file into BierApp by using the following form: -\\ +\\ Figure 8 . \BierApp VCF upload panel. It is recommended to choose a name for the job as well as a description \\. Each prioritization (â€job’) has three associated screens that facilitate the filtering steps. The first one, the â€Summary’ tab, displays a statistic of the data set analyzed, containing the samples analyzed, the number and types of variants found and its distribution according to consequence types. The second screen, in the â€Variants and effect’ tab, is the actual filtering tool, and the third one, the â€Genome view’ tab, offers a representation of the selected variants within the genomic context provided by an embedded version of the Genome Maps Tool (30). - + Figure 9 . This picture shows all the information associated to the variants. If a variant has an associated phenotype we could see it in the last column. In this case, the variant 7:132481242 CT is associated to the phenotype: large intestine tumor. diff --git a/docs.it4i/anselm/software/omics-master/priorization-component-bierapp.md b/docs.it4i/software/bio/omics-master/priorization-component-bierapp.md similarity index 95% rename from docs.it4i/anselm/software/omics-master/priorization-component-bierapp.md rename to docs.it4i/software/bio/omics-master/priorization-component-bierapp.md index 6f88fecc7b92de4bd29a6e022902cb06dbbf1300..df31fced4ebb8d58580821800ed71989094cd11d 100644 --- a/docs.it4i/anselm/software/omics-master/priorization-component-bierapp.md +++ b/docs.it4i/software/bio/omics-master/priorization-component-bierapp.md @@ -1,4 +1,4 @@ -# Prioritization component (BiERapp) +# Prioritization Component (BiERapp) ## Access @@ -13,7 +13,7 @@ BiERapp is available at the [following address](http://omics.it4i.cz/bierapp/) BiERapp (28) efficiently helps in the identification of causative variants in family and sporadic genetic diseases. The program reads lists of predicted variants (nucleotide substitutions and indels) in affected individuals or tumor samples and controls. In family studies, different modes of inheritance can easily be defined to filter out variants that do not segregate with the disease along the family. Moreover, BiERapp integrates additional information such as allelic frequencies in the general population and the most popular damaging scores to further narrow down the number of putative variants in successive filtering steps. BiERapp provides an interactive and user-friendly interface that implements the filtering strategy used in the context of a large-scale genomic project carried out by the Spanish Network for Research, in Rare Diseases (CIBERER) and the Medical Genome Project. in which more than 800 exomes have been analyzed. - + ** Figure 6 **. Web interface to the prioritization tool. This figure shows the interface of the web tool for candidate gene prioritization with the filters available. The tool includes a genomic viewer (Genome Maps 30) that enables the representation of the variants in the corresponding genomic coordinates. diff --git a/docs.it4i/salomon/software/comsol/comsol-multiphysics.md b/docs.it4i/software/cae/comsol/comsol-multiphysics.md similarity index 83% rename from docs.it4i/salomon/software/comsol/comsol-multiphysics.md rename to docs.it4i/software/cae/comsol/comsol-multiphysics.md index 431294469311b408c9e023c17347cae239037622..4827d2d4a97ec3f141b0cbf3a18e25ee85ead9b6 100644 --- a/docs.it4i/salomon/software/comsol/comsol-multiphysics.md +++ b/docs.it4i/software/cae/comsol/comsol-multiphysics.md @@ -16,23 +16,25 @@ COMSOL also allows an interface support for equation-based modelling of partial On the clusters COMSOL is available in the latest stable version. There are two variants of the release: -* **Non commercial** or so called >**EDU variant**>, which can be used for research and educational purposes. +* **Non commercial** or so called **EDU variant**, which can be used for research and educational purposes. -* **Commercial** or so called **COM variant**, which can used also for commercial activities. **COM variant** has only subset of features compared to the **EDU variant** available. More about licensing will be posted here soon. +* **Commercial** or so called **COM variant**, which can used also for commercial activities. **COM variant** has only subset of features compared to the **EDU variant** available. More about licensing [here](licensing-and-available-versions/). To load the of COMSOL load the module ```console -$ ml COMSOL/51-EDU +$ ml COMSOL ``` By default the **EDU variant** will be loaded. If user needs other version or variant, load the particular version. To obtain the list of available versions use ```console -$ ml av COMSOL +$ ml av COMSOL ``` -If user needs to prepare COMSOL jobs in the interactive mode it is recommend to use COMSOL on the compute nodes via PBS Pro scheduler. In order run the COMSOL Desktop GUI on Windows is recommended to use the [Virtual Network Computing (VNC)](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). +If user needs to prepare COMSOL jobs in the interactive mode it is recommend to use COMSOL on the compute nodes via PBS Pro scheduler. In order run the COMSOL Desktop GUI on Windows is recommended to use the [Virtual Network Computing (VNC)](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). + +Example for Salomon: ```console $ xhost + @@ -50,7 +52,7 @@ To run COMSOL in batch mode, without the COMSOL Desktop GUI environment, user ca #PBS -N JOB_NAME #PBS -A PROJECT_ID -cd /scratch/work/user/$USER/ || exit +cd /scratch/work/user/$USER/ || exit # on Anselm use: /scratch/$USER echo Time is `date` echo Directory is `pwd` @@ -65,7 +67,7 @@ module load COMSOL ntask=$(wc -l $PBS_NODEFILE) -comsol -nn ${ntask} batch -configuration /tmp –mpiarg –rmk –mpiarg pbs -tmpdir /scratch/$USER/ -inputfile name_input_f.mph -outputfile name_output_f.mph -batchlog name_log_f.log +comsol -nn ${ntask} batch -configuration /tmp –mpiarg –rmk –mpiarg pbs -tmpdir /scratch/.../$USER/ -inputfile name_input_f.mph -outputfile name_output_f.mph -batchlog name_log_f.log ``` Working directory has to be created before sending the (comsol.pbs) job script into the queue. Input file (name_input_f.mph) has to be in working directory or full path to input file has to be specified. The appropriate path to the temp directory of the job has to be set by command option (-tmpdir). @@ -74,7 +76,7 @@ Working directory has to be created before sending the (comsol.pbs) job script i COMSOL is the software package for the numerical solution of the partial differential equations. LiveLink for MATLAB allows connection to the COMSOL API (Application Programming Interface) with the benefits of the programming language and computing environment of the MATLAB. -LiveLink for MATLAB is available in both **EDU** and **COM** **variant** of the COMSOL release. On the clusters 1 commercial (**COM**) license and the 5 educational (**EDU**) licenses of LiveLink for MATLAB (please see the [ISV Licenses](../../../anselm/software/isv_licenses/)) are available. Following example shows how to start COMSOL model from MATLAB via LiveLink in the interactive mode. +LiveLink for MATLAB is available in both **EDU** and **COM** **variant** of the COMSOL release. On the clusters 1 commercial (**COM**) license and the 5 educational (**EDU**) licenses of LiveLink for MATLAB (please see the [ISV Licenses](../isv_licenses/)) are available. Following example shows how to start COMSOL model from MATLAB via LiveLink in the interactive mode (on Anselm use 16 threads). ```console $ xhost + @@ -95,7 +97,7 @@ To run LiveLink for MATLAB in batch mode with (comsol_matlab.pbs) job script you #PBS -N JOB_NAME #PBS -A PROJECT_ID -cd /scratch/work/user/$USER || exit +cd /scratch/work/user/$USER || exit # on Anselm use: /scratch/$USER echo Time is `date` echo Directory is `pwd` @@ -115,4 +117,4 @@ cd /apps/cae/COMSOL/51/mli matlab -nodesktop -nosplash -r "mphstart; addpath /scratch/work/user/$USER/work; test_job" ``` -This example shows how to run Livelink for MATLAB with following configuration: 3 nodes and 16 cores per node. Working directory has to be created before submitting (comsol_matlab.pbs) job script into the queue. Input file (test_job.m) has to be in working directory or full path to input file has to be specified. The Matlab command option (-r ”mphstart”) created a connection with a COMSOL server using the default port number. +This example shows how to run Livelink for MATLAB with following configuration: 3 nodes and 24 cores per node. Working directory has to be created before submitting (comsol_matlab.pbs) job script into the queue. Input file (test_job.m) has to be in working directory or full path to input file has to be specified. The Matlab command option (-r ”mphstart”) created a connection with a COMSOL server using the default port number. diff --git a/docs.it4i/salomon/software/comsol/licensing-and-available-versions.md b/docs.it4i/software/cae/comsol/licensing-and-available-versions.md similarity index 100% rename from docs.it4i/salomon/software/comsol/licensing-and-available-versions.md rename to docs.it4i/software/cae/comsol/licensing-and-available-versions.md diff --git a/docs.it4i/salomon/software/chemistry/INCAR b/docs.it4i/software/chemistry/INCAR similarity index 100% rename from docs.it4i/salomon/software/chemistry/INCAR rename to docs.it4i/software/chemistry/INCAR diff --git a/docs.it4i/salomon/software/chemistry/KPOINTS b/docs.it4i/software/chemistry/KPOINTS similarity index 100% rename from docs.it4i/salomon/software/chemistry/KPOINTS rename to docs.it4i/software/chemistry/KPOINTS diff --git a/docs.it4i/salomon/software/chemistry/POSCAR b/docs.it4i/software/chemistry/POSCAR similarity index 100% rename from docs.it4i/salomon/software/chemistry/POSCAR rename to docs.it4i/software/chemistry/POSCAR diff --git a/docs.it4i/salomon/software/chemistry/POTCAR b/docs.it4i/software/chemistry/POTCAR similarity index 100% rename from docs.it4i/salomon/software/chemistry/POTCAR rename to docs.it4i/software/chemistry/POTCAR diff --git a/docs.it4i/salomon/software/chemistry/gofree-cond1.sh b/docs.it4i/software/chemistry/gofree-cond1.sh similarity index 100% rename from docs.it4i/salomon/software/chemistry/gofree-cond1.sh rename to docs.it4i/software/chemistry/gofree-cond1.sh diff --git a/docs.it4i/salomon/software/chemistry/molpro.md b/docs.it4i/software/chemistry/molpro.md similarity index 75% rename from docs.it4i/salomon/software/chemistry/molpro.md rename to docs.it4i/software/chemistry/molpro.md index ab53760cda8c5efa186e93d7ab9d4b4032979f53..2fb61643afd70154ca9870375bae76ff27188805 100644 --- a/docs.it4i/salomon/software/chemistry/molpro.md +++ b/docs.it4i/software/chemistry/molpro.md @@ -35,29 +35,29 @@ Molpro is compiled for parallel execution using MPI and OpenMP. By default, Molp !!! note The OpenMP parallelization in Molpro is limited and has been observed to produce limited scaling. We therefore recommend to use MPI parallelization only. This can be achieved by passing option mpiprocs=16:ompthreads=1 to PBS. -You are advised to use the -d option to point to a directory in [SCRATCH filesystem](../../storage/storage/). Molpro can produce a large amount of temporary data during its run, and it is important that these are placed in the fast scratch filesystem. +You are advised to use the -d option to point to a directory in [SCRATCH file system - Salomon](../../salomon/storage/). Molpro can produce a large amount of temporary data during its run, and it is important that these are placed in the fast scratch file system. ### Example jobscript ```bash - #PBS -A IT4I-0-0 - #PBS -q qprod - #PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1 +#PBS -A IT4I-0-0 +#PBS -q qprod +#PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1 - cd $PBS_O_WORKDIR +cd $PBS_O_WORKDIR - # load Molpro module - module add molpro +# load Molpro module +module add molpro - # create a directory in the SCRATCH filesystem - mkdir -p /scratch/$USER/$PBS_JOBID +# create a directory in the SCRATCH filesystem +mkdir -p /scratch/$USER/$PBS_JOBID - # copy an example input - cp /apps/chem/molpro/2010.1/molprop_2010_1_Linux_x86_64_i8/examples/caffeine_opt_diis.com . +# copy an example input +cp /apps/chem/molpro/2010.1/molprop_2010_1_Linux_x86_64_i8/examples/caffeine_opt_diis.com . - # run Molpro with default options - molpro -d /scratch/$USER/$PBS_JOBID caffeine_opt_diis.com +# run Molpro with default options +molpro -d /scratch/$USER/$PBS_JOBID caffeine_opt_diis.com - # delete scratch directory - rm -rf /scratch/$USER/$PBS_JOBID +# delete scratch directory +rm -rf /scratch/$USER/$PBS_JOBID ``` diff --git a/docs.it4i/software/chemistry/nwchem.md b/docs.it4i/software/chemistry/nwchem.md new file mode 100644 index 0000000000000000000000000000000000000000..41c2006e414243c979e987bfcbcfb85e932df72c --- /dev/null +++ b/docs.it4i/software/chemistry/nwchem.md @@ -0,0 +1,36 @@ +# NWChem + +## Introduction + +NWChem aims to provide its users with computational chemistry tools that are scalable both in their ability to treat large scientific computational chemistry problems efficiently, and in their use of available parallel computing resources from high-performance parallel supercomputers to conventional workstation clusters. + +[Homepage](http://www.nwchem-sw.org/index.php/Main_Page) + +## Installed Versions + +For a current list of installed versions, execute: + +```console +$ ml av NWChem +``` + +## Running + + NWChem is compiled for parallel MPI execution. Normal procedure for MPI jobs applies. Sample jobscript (for Salomon on 24 threads): + +```bash +#PBS -A IT4I-0-0 +#PBS -q qprod +#PBS -l select=1:ncpus=24:mpiprocs=24 + +cd $PBS_O_WORKDIR +module add NWChem +mpirun nwchem h2o.nw +``` + +## Options + +Please refer to [the documentation](http://www.nwchem-sw.org/index.php/Release62:Top-level) and in the input file set the following directives : + +* MEMORY : controls the amount of memory NWChem will use +* SCRATCH_DIR : set this to a directory in [SCRATCH filesystem - Salomon](../../salomon/storage/) (or run the calculation completely in a scratch directory). For certain calculations, it might be advisable to reduce I/O by forcing "direct" mode, eg. "scf direct" diff --git a/docs.it4i/software/chemistry/orca.md b/docs.it4i/software/chemistry/orca.md new file mode 100644 index 0000000000000000000000000000000000000000..5712a6d891cc40578ecca371442669b68557faf4 --- /dev/null +++ b/docs.it4i/software/chemistry/orca.md @@ -0,0 +1,190 @@ +# ORCA + +ORCA is a flexible, efficient and easy-to-use general purpose tool for quantum chemistry with specific emphasis on spectroscopic properties of open-shell molecules. It features a wide variety of standard quantum chemical methods ranging from semiempirical methods to DFT to single- and multireference correlated ab initio methods. It can also treat environmental and relativistic effects. + +## ORCA Available on the Cluster + +Many versions of ORCA is available on our clusters. You can list all versions by `ml av` command. + +```console +$ ml av orca +-------------------------- /apps/modules/chem -------------------------- + ORCA/4_0_0-linux_x86-64 ORCA/4_0_1-linux_x86-64 ORCA/4.0.1.2 (D) + + Where: + D: Default Module +``` + +## Serial Computation With ORCA + +You can test serial computation with this simple input file. Create a file called orca_serial.inp and fill it with following ORCA commands. + +```bash + # Taken from the Orca manual + # https://orcaforum.cec.mpg.de/OrcaManual.pdf + ! HF SVP + * xyz 0 1 + C 0 0 0 + O 0 0 1.13 + * +``` + +Next, create a PBS submission file (interactive job can be used too). + +```bash +#!/bin/bash +#PBS -S /bin/bash +#PBS -N ORCA_SERIAL +#PBS -l select=1 +#PBS -q qexp + +ml ORCA/4.0.1.2 +${EBROOTORCA}orca orca_serial.inp +``` + +Submit the job to the queue and wait before it ends. Then you can find an output log in your working directory. + +```console +$ qsub submit_serial.pbs +1417552.dm2 + +$ ll ORCA_SERIAL.* +-rw------- 1 hra0031 hra0031 0 Aug 21 12:24 ORCA_SERIAL.e1417552 +-rw------- 1 hra0031 hra0031 20715 Aug 21 12:25 ORCA_SERIAL.o1417552 + +$ cat ORCA_SERIAL.o1417552 + + ***************** + * O R C A * + ***************** + + --- An Ab Initio, DFT and Semiempirical electronic structure package --- + + ####################################################### + # -***- # + # Department of molecular theory and spectroscopy # + # Directorship: Frank Neese # + # Max Planck Institute for Chemical Energy Conversion # + # D-45470 Muelheim/Ruhr # + # Germany # + # # + # All rights reserved # + # -***- # + ####################################################### + + + Program Version 4.0.1.2 - RELEASE - + +... + + ****ORCA TERMINATED NORMALLY**** +TOTAL RUN TIME: 0 days 0 hours 0 minutes 1 seconds 47 msec +``` + +## Running ORCA in Parallel + +Your serial computation can be converted to parallel in a very simple way. You simply have to specify the number of parallel processes by directive **%pal**. In this example, 4 nodes, 16 cores each are used. + +!!! warning + Don't use **! PAL** directive as only PAL2 to PAL8 is recognized! + +```bash + # Taken from the Orca manual + # https://orcaforum.cec.mpg.de/OrcaManual.pdf + ! HF SVP + %pal + nprocs 64 # 4 nodes, 16 cores each + end + * xyz 0 1 + C 0 0 0 + O 0 0 1.13 + * +``` + +You also need to edit the previously used PBS submission file. You have to specify number of nodes, cores and MPI-processes to run. + +```bash +#!/bin/bash +#PBS -S /bin/bash +#PBS -N ORCA_PARALLEL +#PBS -l select=4:ncpus=16:mpiprocs=16 +#PBS -q qexp + +ml ORCA/4.0.1.2 +${EBROOTORCA}/orca orca_parallel.inp +``` + +!!! note + When running ORCA in parallel, ORCA should **NOT** be started with mpirun: e.g. mpirun -np 4 orca etc. like many MPI programs and has to be called with full pathname. + +Submit this job to the queue and see the output file. + +```console +$ qsub submit_parallel.pbs +1417598.dm2 + +$ ll ORCA_PARALLEL.* +-rw------- 1 hra0031 hra0031 0 Aug 21 13:12 ORCA_PARALLEL.e1417598 +-rw------- 1 hra0031 hra0031 23561 Aug 21 13:13 ORCA_PARALLEL.o1417598 + +$ cat ORCA_PARALLEL.o1417598 + + ***************** + * O R C A * + ***************** + + --- An Ab Initio, DFT and Semiempirical electronic structure package --- + + ####################################################### + # -***- # + # Department of molecular theory and spectroscopy # + # Directorship: Frank Neese # + # Max Planck Institute for Chemical Energy Conversion # + # D-45470 Muelheim/Ruhr # + # Germany # + # # + # All rights reserved # + # -***- # + ####################################################### + + + Program Version 4.0.1.2 - RELEASE - +... + + ************************************************************ + * Program running with 64 parallel MPI-processes * + * working on a common directory * + ************************************************************ + +... + ****ORCA TERMINATED NORMALLY**** +TOTAL RUN TIME: 0 days 0 hours 0 minutes 11 seconds 859 msec +``` + +You can see, that the program was running with 64 parallel MPI-processes. In version 4.0.1.2, only the following modules are parallelized: + +* ANOINT +* CASSCF / NEVPT2 +* CIPSI +* CIS/TDDFT +* CPSCF +* EPRNMR +* GTOINT +* MDCI (Canonical-, PNO-, DLPNO-Methods) +* MP2 and RI-MP2 (including Gradient and Hessian) +* MRCI +* PC +* ROCIS +* SCF +* SCFGRAD +* SCFHESS +* SOC +* Numerical Gradients and Frequencies + +## Register as a User + +You are encouraged to register as a user of ORCA at [Here](https://orcaforum.cec.mpg.de/) in order to take advantage of updates, announcements and also of the users forum. + +## Documentation + +A comprehensive [PDF](https://orcaforum.cec.mpg.de/OrcaManual.pdf) manual is available online. diff --git a/docs.it4i/salomon/software/chemistry/phono3py.md b/docs.it4i/software/chemistry/phono3py.md similarity index 97% rename from docs.it4i/salomon/software/chemistry/phono3py.md rename to docs.it4i/software/chemistry/phono3py.md index 5f366baa1e6acb0cb948cd473a9acb65243691c8..884f25cbf89a4b43afa3afc9a02fdbec171c9383 100644 --- a/docs.it4i/salomon/software/chemistry/phono3py.md +++ b/docs.it4i/software/chemistry/phono3py.md @@ -18,7 +18,7 @@ $ ml phono3py ### Calculating Force Constants -One needs to calculate second order and third order force constants using the diamond structure of silicon stored in [POSCAR](poscar-si) (the same form as in VASP) using single displacement calculations within supercell. +One needs to calculate second order and third order force constants using the diamond structure of silicon stored in [POSCAR](POSCAR) (the same form as in VASP) using single displacement calculations within supercell. ```console $ cat POSCAR diff --git a/docs.it4i/salomon/software/chemistry/prepare.sh b/docs.it4i/software/chemistry/prepare.sh similarity index 100% rename from docs.it4i/salomon/software/chemistry/prepare.sh rename to docs.it4i/software/chemistry/prepare.sh diff --git a/docs.it4i/salomon/software/chemistry/run.sh b/docs.it4i/software/chemistry/run.sh similarity index 100% rename from docs.it4i/salomon/software/chemistry/run.sh rename to docs.it4i/software/chemistry/run.sh diff --git a/docs.it4i/salomon/software/chemistry/submit.sh b/docs.it4i/software/chemistry/submit.sh similarity index 100% rename from docs.it4i/salomon/software/chemistry/submit.sh rename to docs.it4i/software/chemistry/submit.sh diff --git a/docs.it4i/salomon/software/compilers.md b/docs.it4i/software/compilers.md similarity index 92% rename from docs.it4i/salomon/software/compilers.md rename to docs.it4i/software/compilers.md index a49aa8eb4dfa2d832572e8c225b6ceccdd84bc82..e264a458cb482a9bf0cd2ad684e227a01ef83987 100644 --- a/docs.it4i/salomon/software/compilers.md +++ b/docs.it4i/software/compilers.md @@ -1,6 +1,6 @@ # Compilers -Available compilers, including GNU, INTEL and UPC compilers +## Available Compilers There are several compilers for different programming languages available on the cluster: @@ -8,6 +8,7 @@ There are several compilers for different programming languages available on the * Fortran 77/90/95/HPF * Unified Parallel C * Java +* NVIDIA CUDA (only on Anselm) The C/C++ and Fortran compilers are provided by: @@ -23,11 +24,11 @@ Commercial licenses: ## Intel Compilers -For information about the usage of Intel Compilers and other Intel products, please read the [Intel Parallel studio](intel-suite/) page. +For information about the usage of Intel Compilers and other Intel products, please read the [Intel Parallel studio](intel-suite/intel-compilers/) page. -## PGI Compilers +## PGI Compilers (Only on Salomon) -The Portland Group Cluster Development Kit (PGI CDK) is available. +The Portland Group Cluster Development Kit (PGI CDK) is available on Salomon. ```console $ module load PGI @@ -132,7 +133,7 @@ For more information see the man pages. To use the Berkley UPC compiler and runtime environment to run the binaries use the module bupc ```console -$ module add BerkeleyUPC/2.16.2-gompi-2015b +$ module add BerkeleyUPC/2.16.2-gompi-2015b # on Anselm: ml bupc $ upcc -version ``` @@ -174,7 +175,7 @@ To run the example with 5 threads issue $ upcrun -n 5 ./hello.upc.x ``` -To run the example on two compute nodes using all 48 cores, with 48 threads, issue +To run the example on two compute nodes using all 48 cores, with 48 threads, issue (on Anselm compute on 32 cores) ```console $ qsub -I -q qprod -A PROJECT_ID -l select=2:ncpus=24 @@ -190,4 +191,4 @@ For information how to use Java (runtime and/or compiler), please read the [Java ## NVIDIA CUDA -For information how to work with NVIDIA CUDA, please read the [NVIDIA CUDA page](../../anselm/software/nvidia-cuda/). +For information how to work with NVIDIA CUDA, please read the [NVIDIA CUDA page](../anselm/software/nvidia-cuda/). diff --git a/docs.it4i/salomon/software/debuggers/Introduction.md b/docs.it4i/software/debuggers/Introduction.md similarity index 98% rename from docs.it4i/salomon/software/debuggers/Introduction.md rename to docs.it4i/software/debuggers/Introduction.md index 4ce2fc77b013659f5b128408e4ec5f0e78c9c686..d5541e2f81ce812a8278cc54e35a3880be9e2cb1 100644 --- a/docs.it4i/salomon/software/debuggers/Introduction.md +++ b/docs.it4i/software/debuggers/Introduction.md @@ -1,4 +1,4 @@ -# Debuggers and profilers summary +# Debuggers and Profilers Summary ## Introduction diff --git a/docs.it4i/salomon/software/debuggers/aislinn.md b/docs.it4i/software/debuggers/aislinn.md similarity index 99% rename from docs.it4i/salomon/software/debuggers/aislinn.md rename to docs.it4i/software/debuggers/aislinn.md index 89cf7538016c004b1ba9058bcf148bbf0761eb50..2a945a04e8c74218b312c2ad9d6bdd08f1f54a5b 100644 --- a/docs.it4i/salomon/software/debuggers/aislinn.md +++ b/docs.it4i/software/debuggers/aislinn.md @@ -79,7 +79,7 @@ $ firefox report.html At the beginning of the report there are some basic summaries of the verification. In the second part (depicted in the following picture), the error is described. - + It shows us: diff --git a/docs.it4i/salomon/software/debuggers/allinea-ddt.md b/docs.it4i/software/debuggers/allinea-ddt.md similarity index 99% rename from docs.it4i/salomon/software/debuggers/allinea-ddt.md rename to docs.it4i/software/debuggers/allinea-ddt.md index 6e1f046f10fd2d521343a995cb59580440080a73..67bfdff184ed1244a154848de728d58f4c678c94 100644 --- a/docs.it4i/salomon/software/debuggers/allinea-ddt.md +++ b/docs.it4i/software/debuggers/allinea-ddt.md @@ -75,7 +75,7 @@ $ ddt test_debug A submission window that appears have a prefilled path to the executable to debug. You can select the number of MPI processors and/or OpenMP threads on which to run and press run. Command line arguments to a program can be entered to the "Arguments " box. - + To start the debugging directly without the submission window, user can specify the debugging and execution parameters from the command line. For example the number of MPI processes is set by option "-np 4". Skipping the dialog is done by "-start" option. To see the list of the "ddt" command line parameters, run "ddt --help". diff --git a/docs.it4i/salomon/software/debuggers/allinea-performance-reports.md b/docs.it4i/software/debuggers/allinea-performance-reports.md similarity index 96% rename from docs.it4i/salomon/software/debuggers/allinea-performance-reports.md rename to docs.it4i/software/debuggers/allinea-performance-reports.md index ead91a093c83ba9503f2be7ba702e698d7bca0df..14451cdb0a25a5704fcafb61b9b0a558b6df882c 100644 --- a/docs.it4i/salomon/software/debuggers/allinea-performance-reports.md +++ b/docs.it4i/software/debuggers/allinea-performance-reports.md @@ -28,7 +28,7 @@ Instead of [running your MPI program the usual way](../mpi/mpi/), use the the pe $ perf-report mpirun ./mympiprog.x ``` -The mpi program will run as usual. The perf-report creates two additional files, in \*.txt and \*.html format, containing the performance report. Note that demanding MPI codes should be run within [the queue system](../../job-submission-and-execution/). +The mpi program will run as usual. The perf-report creates two additional files, in \*.txt and \*.html format, containing the performance report. Note that demanding MPI codes should be run within [the queue system](../../anselm/job-submission-and-execution/). ## Example @@ -45,7 +45,7 @@ $ qsub -q qexp -l select=2:ppn=24:mpiprocs=24:ompthreads=1 -I Then we load the modules and run the program the usual way: ```console -$ ml intel +$ ml intel $ ml PerfReports/6.0 $ mpirun ./mympiprog.x ``` diff --git a/docs.it4i/anselm/software/debuggers/cube.md b/docs.it4i/software/debuggers/cube.md similarity index 95% rename from docs.it4i/anselm/software/debuggers/cube.md rename to docs.it4i/software/debuggers/cube.md index a7f88955e78159f5800a37e603f91fa09e3ccdbe..4edf6ea02a445e633315ae4448c72e2c74a72fae 100644 --- a/docs.it4i/anselm/software/debuggers/cube.md +++ b/docs.it4i/software/debuggers/cube.md @@ -10,7 +10,7 @@ CUBE is a graphical performance report explorer for displaying data from Score-P Each dimension is organized in a tree, for example the time performance metric is divided into Execution time and Overhead time, call path dimension is organized by files and routines in your source code etc. - + \*Figure 1. Screenshot of CUBE displaying data from Scalasca.\* @@ -18,7 +18,7 @@ Each node in the tree is colored by severity (the color scheme is displayed at t ## Installed Versions -Currently, there are two versions of CUBE 4.2.3 available as [modules](../../environment-and-modules/): +Currently, there are two versions of CUBE 4.2.3 available as [modules](../../modules-matrix/): * cube/4.2.3-gcc, compiled with GCC * cube/4.2.3-icc, compiled with Intel compiler diff --git a/docs.it4i/anselm/software/debuggers/intel-performance-counter-monitor.md b/docs.it4i/software/debuggers/intel-performance-counter-monitor.md similarity index 99% rename from docs.it4i/anselm/software/debuggers/intel-performance-counter-monitor.md rename to docs.it4i/software/debuggers/intel-performance-counter-monitor.md index b46b472b68577a3f0764199439de310a967a4bde..3373cc4eeb9c92ab49f1ee9e72005d70911d1f46 100644 --- a/docs.it4i/anselm/software/debuggers/intel-performance-counter-monitor.md +++ b/docs.it4i/software/debuggers/intel-performance-counter-monitor.md @@ -6,7 +6,7 @@ Intel PCM (Performance Counter Monitor) is a tool to monitor performance hardwar ## Installed Version -Currently installed version 2.6. To load the [module](../../environment-and-modules/), issue: +Currently installed version 2.6. To load the [module](../../modules-matrix/) issue: ```console $ ml intelpcm diff --git a/docs.it4i/salomon/software/debuggers/intel-vtune-amplifier.md b/docs.it4i/software/debuggers/intel-vtune-amplifier.md similarity index 94% rename from docs.it4i/salomon/software/debuggers/intel-vtune-amplifier.md rename to docs.it4i/software/debuggers/intel-vtune-amplifier.md index 192aece7e250dfb9b2938daebe83606a1f002b06..8003916b32bb87ef712b2b412349388389e74d3e 100644 --- a/docs.it4i/salomon/software/debuggers/intel-vtune-amplifier.md +++ b/docs.it4i/software/debuggers/intel-vtune-amplifier.md @@ -9,7 +9,7 @@ Intel *®* VTune™ Amplifier, part of Intel Parallel studio, is a GUI profiling * Low level specific counters, such as branch analysis and memory bandwidth * Power usage analysis - frequency and sleep states. - + ## Usage @@ -19,6 +19,9 @@ To profile an application with VTune Amplifier, special kernel modules need to b $ qsub -q qexp -A OPEN-0-0 -I -l select=1,vtune=2016_update1 ``` +!!! warning + Temporarily disabled - bug in the kernel ([intel-vtune-amplifier-impact-of-recent-os-security-updates](https://software.intel.com/en-us/articles/intel-vtune-amplifier-impact-of-recent-os-security-updates)) + After that, you can verify the modules sep\*, pax and vtsspp are present in the kernel : ```console @@ -89,6 +92,5 @@ You can obtain this command line by pressing the "Command line..." button on Ana ## References -1. [Performance Tuning for Intel® Xeon Phi™ Coprocessors](https://www.rcac.purdue.edu/tutorials/phi/PerformanceTuningXeonPhi-Tullos.pdf) 1. [Intel® VTune™ Amplifier Support](https://software.intel.com/en-us/intel-vtune-amplifier-xe-support/documentation) 1. [https://software.intel.com/en-us/amplifier_help_linux](https://software.intel.com/en-us/amplifier_help_linux) diff --git a/docs.it4i/salomon/software/debuggers/mympiprog_32p_2014-10-15_16-56.html b/docs.it4i/software/debuggers/mympiprog_32p_2014-10-15_16-56.html similarity index 100% rename from docs.it4i/salomon/software/debuggers/mympiprog_32p_2014-10-15_16-56.html rename to docs.it4i/software/debuggers/mympiprog_32p_2014-10-15_16-56.html diff --git a/docs.it4i/salomon/software/debuggers/mympiprog_32p_2014-10-15_16-56.txt b/docs.it4i/software/debuggers/mympiprog_32p_2014-10-15_16-56.txt similarity index 100% rename from docs.it4i/salomon/software/debuggers/mympiprog_32p_2014-10-15_16-56.txt rename to docs.it4i/software/debuggers/mympiprog_32p_2014-10-15_16-56.txt diff --git a/docs.it4i/anselm/software/debuggers/papi.md b/docs.it4i/software/debuggers/papi.md similarity index 99% rename from docs.it4i/anselm/software/debuggers/papi.md rename to docs.it4i/software/debuggers/papi.md index d03dd8354769895e3b7f8454f5a0dd613a626bc3..15b03837e4bb18d2b8b6357d60bb29574b30fdae 100644 --- a/docs.it4i/anselm/software/debuggers/papi.md +++ b/docs.it4i/software/debuggers/papi.md @@ -10,7 +10,7 @@ PAPI can be used with parallel as well as serial programs. ## Usage -To use PAPI, load [module](../../environment-and-modules/) papi: +To use PAPI, load [module](../../modules-matrix/) papi: ```console $ ml papi diff --git a/docs.it4i/anselm/software/debuggers/scalasca.md b/docs.it4i/software/debuggers/scalasca.md similarity index 96% rename from docs.it4i/anselm/software/debuggers/scalasca.md rename to docs.it4i/software/debuggers/scalasca.md index a7cd44b1d5236eb3e257a24f5a3cfbdb96e6b0f5..f8f1db9d0249bf7f25b448f3e017aadddff08181 100644 --- a/docs.it4i/anselm/software/debuggers/scalasca.md +++ b/docs.it4i/software/debuggers/scalasca.md @@ -8,7 +8,7 @@ Scalasca supports profiling of MPI, OpenMP and hybrid MPI+OpenMP applications. ## Installed Versions -There are currently two versions of Scalasca 2.0 [modules](../../environment-and-modules/) installed on Anselm: +There are currently two versions of Scalasca 2.0 [modules](../../modules-matrix/) installed on Anselm: * scalasca2/2.0-gcc-openmpi, for usage with [GNU Compiler](../compilers/) and [OpenMPI](../mpi/Running_OpenMPI/), * scalasca2/2.0-icc-impi, for usage with [Intel Compiler](../compilers/) and [Intel MPI](../mpi/running-mpich2/). @@ -43,7 +43,7 @@ Some notable Scalasca options are: * **-e <directory> Specify a directory to save the collected data to. By default, Scalasca saves the data to a directory with prefix scorep\_, followed by name of the executable and launch configuration.** !!! note - Scalasca can generate a huge amount of data, especially if tracing is enabled. Please consider saving the data to a [scratch directory](../../storage/storage/). + Scalasca can generate a huge amount of data, especially if tracing is enabled. Please consider saving the data to a [scratch directory](../../salomon/storage/). ### Analysis of Reports diff --git a/docs.it4i/anselm/software/debuggers/score-p.md b/docs.it4i/software/debuggers/score-p.md similarity index 69% rename from docs.it4i/anselm/software/debuggers/score-p.md rename to docs.it4i/software/debuggers/score-p.md index 3295933c45e6c7f8b7275a5bede4cef5064bd49f..186762617bc0ae39f624df391b0816044fc5f327 100644 --- a/docs.it4i/anselm/software/debuggers/score-p.md +++ b/docs.it4i/software/debuggers/score-p.md @@ -8,7 +8,7 @@ Score-P can be used as an instrumentation tool for [Scalasca](scalasca/). ## Installed Versions -There are currently two versions of Score-P version 1.2.6 [modules](../../environment-and-modules/) installed on Anselm : +There are currently two versions of Score-P version 1.2.6 [modules](../../modules-matrix/) installed on Anselm : * scorep/1.2.3-gcc-openmpi, for usage with [GNU Compiler](../compilers/) and [OpenMPI](../mpi/Running_OpenMPI/) * scorep/1.2.3-icc-impi, for usage with [Intel Compiler](../compilers/)> and [Intel MPI](../mpi/running-mpich2/)>. @@ -50,28 +50,28 @@ To use this kind of instrumentation, use scorep with switch --user. You will the An example in C/C++ : ```cpp - #include <scorep/SCOREP_User.h> - void foo() - { - SCOREP_USER_REGION_DEFINE( my_region_handle ) - // more declarations - SCOREP_USER_REGION_BEGIN( my_region_handle, "foo", SCOREP_USER_REGION_TYPE_COMMON ) - // do something - SCOREP_USER_REGION_END( my_region_handle ) - } +#include <scorep/SCOREP_User.h> +void foo() +{ + SCOREP_USER_REGION_DEFINE( my_region_handle ) + // more declarations + SCOREP_USER_REGION_BEGIN( my_region_handle, "foo", SCOREP_USER_REGION_TYPE_COMMON ) + // do something + SCOREP_USER_REGION_END( my_region_handle ) +} ``` and Fortran : -```cpp - #include "scorep/SCOREP_User.inc" - subroutine foo - SCOREP_USER_REGION_DEFINE( my_region_handle ) - ! more declarations - SCOREP_USER_REGION_BEGIN( my_region_handle, "foo", SCOREP_USER_REGION_TYPE_COMMON ) - ! do something - SCOREP_USER_REGION_END( my_region_handle ) - end subroutine foo +```fortran +#include "scorep/SCOREP_User.inc" +subroutine foo + SCOREP_USER_REGION_DEFINE( my_region_handle ) + ! more declarations + SCOREP_USER_REGION_BEGIN( my_region_handle, "foo", SCOREP_USER_REGION_TYPE_COMMON ) + ! do something + SCOREP_USER_REGION_END( my_region_handle ) +end subroutine foo ``` Please refer to the [documentation for description of the API](https://silc.zih.tu-dresden.de/scorep-current/pdf/scorep.pdf). @@ -83,35 +83,35 @@ This method uses POMP2 directives to mark regions to be instrumented. To use thi Example directives in C/C++ : ```cpp - void foo(...) +void foo(...) +{ + /* declarations */ + #pragma pomp inst begin(foo) + ... + if (<condition>) { - /* declarations */ - #pragma pomp inst begin(foo) - ... - if (<condition>) - { - #pragma pomp inst altend(foo) - return; - } - ... - #pragma pomp inst end(foo) + #pragma pomp inst altend(foo) + return; } + ... + #pragma pomp inst end(foo) +} ``` and in Fortran : -```cpp - subroutine foo(...) - !declarations - !POMP$ INST BEGIN(foo) - ... - if (<condition>) then - !POMP$ INST ALTEND(foo) - return - end if - ... - !POMP$ INST END(foo) - end subroutine foo +```fortran +subroutine foo(...) + !declarations + !POMP$ INST BEGIN(foo) + ... + if (<condition>) then + !POMP$ INST ALTEND(foo) + return + end if + ... + !POMP$ INST END(foo) +end subroutine foo ``` The directives are ignored if the program is compiled without Score-P. Again, please refer to the [documentation](https://silc.zih.tu-dresden.de/scorep-current/pdf/scorep.pdf) for a more elaborate description. diff --git a/docs.it4i/salomon/software/debuggers/total-view.md b/docs.it4i/software/debuggers/total-view.md similarity index 70% rename from docs.it4i/salomon/software/debuggers/total-view.md rename to docs.it4i/software/debuggers/total-view.md index 0235c845d012f4c0f5245e7ae2c5f8d96b6efe3c..aebe91a523f00cc82fe566fed7f2102af9762509 100644 --- a/docs.it4i/salomon/software/debuggers/total-view.md +++ b/docs.it4i/software/debuggers/total-view.md @@ -13,7 +13,19 @@ On the cluster users can debug OpenMP or MPI code that runs up to 64 parallel pr Debugging of GPU accelerated codes is also supported. -You can check the status of the licenses [here](https://extranet.it4i.cz/rsweb/anselm/license/totalview). +You can check the status of the licenses [here (Salomon)](https://extranet.it4i.cz/rsweb/anselm/license/Totalview) or type (Anselm): + +```console +$ cat /apps/user/licenses/totalview_features_state.txt + + # totalview + # ------------------------------------------------- + # FEATURE TOTAL USED AVAIL + # ------------------------------------------------- + TotalView_Team 64 0 64 + Replay 64 0 64 + CUDA 64 0 64 +``` ## Compiling Code to Run With TotalView @@ -28,7 +40,8 @@ Load all necessary modules to compile the code. For example: Load the TotalView module: ```console - ml TotalView/8.15.4-6-linux-x86-64 + ml TotalView + ml totalview ``` Compile the code: @@ -57,7 +70,7 @@ ssh -X username@salomon.it4i.cz Other options is to access login node using VNC. Please see the detailed information on how to use graphic user interface on Anselm. -From the login node an interactive session with X windows forwarding (-X option) can be started by following command: +From the login node an interactive session with X windows forwarding (-X option) can be started by following command (for Anselm use 16 threads): ```console $ qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=24:mpiprocs=24,walltime=01:00:00 @@ -81,36 +94,40 @@ To debug a parallel code compiled with **OpenMPI** you need to setup your TotalV To be able to run parallel debugging procedure from the command line without stopping the debugger in the mpiexec source code you have to add the following function to your **~/.tvdrc** file. ```console - proc mpi_auto_run_starter {loaded_id} { - set starter_programs {mpirun mpiexec orterun} - set executable_name [TV::symbol get $loaded_id full_pathname] - set file_component [file tail $executable_name] - - if {[lsearch -exact $starter_programs $file_component] != -1} { - puts "*************************************" - puts "Automatically starting $file_component" - puts "*************************************" - dgo - } +proc mpi_auto_run_starter {loaded_id} { + set starter_programs {mpirun mpiexec orterun} + set executable_name [TV::symbol get $loaded_id full_pathname] + set file_component [file tail $executable_name] + + if {[lsearch -exact $starter_programs $file_component] != -1} { + puts "*************************************" + puts "Automatically starting $file_component" + puts "*************************************" + dgo } +} - # Append this function to TotalView's image load callbacks so that - # TotalView run this program automatically. +# Append this function to TotalView's image load callbacks so that +# TotalView run this program automatically. - dlappend TV::image_load_callbacks mpi_auto_run_starter +dlappend TV::image_load_callbacks mpi_auto_run_starter ``` The source code of this function can be also found in ```console -$ /apps/all/OpenMPI/1.10.1-GNU-4.9.3-2.25/etc/openmpi-totalview.tcl +$ /apps/all/OpenMPI/1.10.1-GNU-4.9.3-2.25/etc/openmpi-totalview.tcl #Salomon + +$ /apps/mpi/openmpi/intel/1.6.5/etc/openmpi-totalview.tcl #Anselm ``` You can also add only following line to you ~/.tvdrc file instead of the entire function: ```console -$ source /apps/all/OpenMPI/1.10.1-GNU-4.9.3-2.25/etc/openmpi-totalview.tcl +$ source /apps/all/OpenMPI/1.10.1-GNU-4.9.3-2.25/etc/openmpi-totalview.tcl #Salomon + +$ source /apps/mpi/openmpi/intel/1.6.5/etc/openmpi-totalview.tcl #Anselm ``` You need to do this step only once. See also [OpenMPI FAQ entry](https://www.open-mpi.org/faq/?category=running#run-with-tv) @@ -123,11 +140,11 @@ $ mpirun -tv -n 5 ./test_debug When following dialog appears click on "Yes" - + At this point the main TotalView GUI window will appear and you can insert the breakpoints and start debugging: - + ### Debugging a Parallel Code - Option 2 diff --git a/docs.it4i/anselm/software/debuggers/valgrind.md b/docs.it4i/software/debuggers/valgrind.md similarity index 88% rename from docs.it4i/anselm/software/debuggers/valgrind.md rename to docs.it4i/software/debuggers/valgrind.md index 0e381e945c86c1a53af181b8cb62194171535bee..b91eeb7a0642fb603a43029f577fb3310acee827 100644 --- a/docs.it4i/anselm/software/debuggers/valgrind.md +++ b/docs.it4i/software/debuggers/valgrind.md @@ -22,7 +22,13 @@ The main tools available in Valgrind are : There are two versions of Valgrind available on Anselm. * Version 3.6.0, installed by operating system vendor in /usr/bin/valgrind. This version is available by default, without the need to load any module. This version however does not provide additional MPI support. -* Version 3.9.0 with support for Intel MPI, available in [module](../../environment-and-modules/) valgrind/3.9.0-impi. After loading the module, this version replaces the default valgrind. +* Version 3.9.0 with support for Intel MPI, available in [module](../../modules-matrix/) valgrind/3.9.0-impi. After loading the module, this version replaces the default valgrind. + +There are two versions of Valgrind available on the Salomon. + +* Version 3.8.1, installed by operating system vendor in /usr/bin/valgrind. This version is available by default, without the need to load any module. This version however does not provide additional MPI support. Also, it does not support AVX2 instructions, debugging of an AVX2-enabled executable with this version will fail +* Version 3.11.0 built by ICC with support for Intel MPI, available in module Valgrind/3.11.0-intel-2015b. After loading the module, this version replaces the default valgrind. +* Version 3.11.0 built by GCC with support for Open MPI, module Valgrind/3.11.0-foss-2015b ## Usage @@ -31,19 +37,19 @@ Compile the application which you want to debug as usual. It is advisable to add For example, lets look at this C code, which has two problems : ```cpp - #include <stdlib.h> - - void f(void) - { - int* x = malloc(10 * sizeof(int)); - x[10] = 0; // problem 1: heap block overrun - } // problem 2: memory leak -- x not freed - - int main(void) - { - f(); - return 0; - } +#include <stdlib.h> + +void f(void) +{ + int* x = malloc(10 * sizeof(int)); + x[10] = 0; // problem 1: heap block overrun +} // problem 2: memory leak -- x not freed + +int main(void) +{ + f(); + return 0; +} ``` Now, compile it with Intel compiler : @@ -156,24 +162,29 @@ The default version without MPI support will however report a large number of fa ==30166== by 0x4008BD: main (valgrind-example-mpi.c:18) ``` -so it is better to use the MPI-enabled valgrind from module. The MPI version requires library /apps/tools/valgrind/3.9.0/impi/lib/valgrind/libmpiwrap-amd64-linux.so, which must be included in the LD_PRELOAD environment variable. +so it is better to use the MPI-enabled valgrind from module. The MPI version requires library: + +* Anselm: /apps/tools/valgrind/3.9.0/impi/lib/valgrind/libmpiwrap-amd64-linux.so +* Salomon: $EBROOTVALGRIND/lib/valgrind/libmpiwrap-amd64-linux.so + +which must be included in the LD_PRELOAD environment variable. Lets look at this MPI example : ```cpp - #include <stdlib.h> - #include <mpi.h> +#include <stdlib.h> +#include <mpi.h> - int main(int argc, char *argv[]) - { - int *data = malloc(sizeof(int)*99); +int main(int argc, char *argv[]) +{ + int *data = malloc(sizeof(int)*99); - MPI_Init(&argc, &argv); - MPI_Bcast(data, 100, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Finalize(); + MPI_Init(&argc, &argv); + MPI_Bcast(data, 100, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Finalize(); - return 0; - } + return 0; +} ``` There are two errors - use of uninitialized memory and invalid length of the buffer. Lets debug it with valgrind : diff --git a/docs.it4i/salomon/software/debuggers/vampir.md b/docs.it4i/software/debuggers/vampir.md similarity index 90% rename from docs.it4i/salomon/software/debuggers/vampir.md rename to docs.it4i/software/debuggers/vampir.md index 852374d229d2c4f4a2e4c612c85d25b1c121faf0..93d11f0ca571bd8dadd401b7f6173a3403683476 100644 --- a/docs.it4i/salomon/software/debuggers/vampir.md +++ b/docs.it4i/software/debuggers/vampir.md @@ -2,7 +2,7 @@ Vampir is a commercial trace analysis and visualisation tool. It can work with traces in OTF and OTF2 formats. It does not have the functionality to collect traces, you need to use a trace collection tool (such as [Score-P](score-p/)) first to collect the traces. - + ## Installed Versions diff --git a/docs.it4i/salomon/software/intel-suite/intel-advisor.md b/docs.it4i/software/intel/intel-suite/intel-advisor.md similarity index 100% rename from docs.it4i/salomon/software/intel-suite/intel-advisor.md rename to docs.it4i/software/intel/intel-suite/intel-advisor.md diff --git a/docs.it4i/salomon/software/intel-suite/intel-compilers.md b/docs.it4i/software/intel/intel-suite/intel-compilers.md similarity index 100% rename from docs.it4i/salomon/software/intel-suite/intel-compilers.md rename to docs.it4i/software/intel/intel-suite/intel-compilers.md diff --git a/docs.it4i/salomon/software/intel-suite/intel-debugger.md b/docs.it4i/software/intel/intel-suite/intel-debugger.md similarity index 79% rename from docs.it4i/salomon/software/intel-suite/intel-debugger.md rename to docs.it4i/software/intel/intel-suite/intel-debugger.md index 15788c798785390777016856b8ffcc111227c1d2..ac7cec6ad56acbc3705fcdc478531e2cade64c47 100644 --- a/docs.it4i/salomon/software/intel-suite/intel-debugger.md +++ b/docs.it4i/software/intel/intel-suite/intel-debugger.md @@ -4,10 +4,10 @@ IDB is no longer available since Intel Parallel Studio 2015 ## Debugging Serial Applications -The intel debugger version 13.0 is available, via module intel. The debugger works for applications compiled with C and C++ compiler and the ifort fortran 77/90/95 compiler. The debugger provides java GUI environment. Use [X display](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/) for running the GUI. +The intel debugger version is available, via module intel/13.5.192. The debugger works for applications compiled with C and C++ compiler and the ifort fortran 77/90/95 compiler. The debugger provides java GUI environment. Use [X display](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/) for running the GUI. ```console -$ ml intel +$ ml intel/13.5.192 $ ml Java $ idb ``` @@ -18,12 +18,12 @@ The debugger may run in text mode. To debug in text mode, use $ idbc ``` -To debug on the compute nodes, module intel must be loaded. The GUI on compute nodes may be accessed using the same way as in [the GUI section](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/) +To debug on the compute nodes, module intel must be loaded. The GUI on compute nodes may be accessed using the same way as in [the GUI section](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/) Example: ```console -$ qsub -q qexp -l select=1:ncpus=24 -X -I +$ qsub -q qexp -l select=1:ncpus=24 -X -I # use 16 threads for Anselm qsub: waiting for job 19654.srv11 to start qsub: job 19654.srv11 ready $ ml intel @@ -40,7 +40,7 @@ In this example, we allocate 1 full compute node, compile program myprog.c with ### Small Number of MPI Ranks -For debugging small number of MPI ranks, you may execute and debug each rank in separate xterm terminal (do not forget the [X display](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/)). Using Intel MPI, this may be done in following way: +For debugging small number of MPI ranks, you may execute and debug each rank in separate xterm terminal (do not forget the [X display](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/)). Using Intel MPI, this may be done in following way: ```console $ qsub -q qexp -l select=2:ncpus=24 -X -I diff --git a/docs.it4i/salomon/software/intel-suite/intel-inspector.md b/docs.it4i/software/intel/intel-suite/intel-inspector.md similarity index 100% rename from docs.it4i/salomon/software/intel-suite/intel-inspector.md rename to docs.it4i/software/intel/intel-suite/intel-inspector.md diff --git a/docs.it4i/salomon/software/intel-suite/intel-integrated-performance-primitives.md b/docs.it4i/software/intel/intel-suite/intel-integrated-performance-primitives.md similarity index 53% rename from docs.it4i/salomon/software/intel-suite/intel-integrated-performance-primitives.md rename to docs.it4i/software/intel/intel-suite/intel-integrated-performance-primitives.md index 60628eed0744d4305f79f4b77ff2f4de8e11c10d..a47233367e4130177be4db677197a07ec26f9fb2 100644 --- a/docs.it4i/salomon/software/intel-suite/intel-integrated-performance-primitives.md +++ b/docs.it4i/software/intel/intel-suite/intel-integrated-performance-primitives.md @@ -15,44 +15,44 @@ The module sets up environment variables, required for linking and running ipp e ## IPP Example ```cpp - #include "ipp.h" - #include <stdio.h> - int main(int argc, char* argv[]) - { - const IppLibraryVersion *lib; - Ipp64u fm; - IppStatus status; - - status= ippInit(); //IPP initialization with the best optimization layer - if( status != ippStsNoErr ) { - printf("IppInit() Error:n"); - printf("%sn", ippGetStatusString(status) ); - return -1; - } - - //Get version info - lib = ippiGetLibVersion(); - printf("%s %sn", lib->Name, lib->Version); - - //Get CPU features enabled with selected library level - fm=ippGetEnabledCpuFeatures(); - printf("SSE :%cn",(fm>1)&1?'Y':'N'); - printf("SSE2 :%cn",(fm>2)&1?'Y':'N'); - printf("SSE3 :%cn",(fm>3)&1?'Y':'N'); - printf("SSSE3 :%cn",(fm>4)&1?'Y':'N'); - printf("SSE41 :%cn",(fm>6)&1?'Y':'N'); - printf("SSE42 :%cn",(fm>7)&1?'Y':'N'); - printf("AVX :%cn",(fm>8)&1 ?'Y':'N'); - printf("AVX2 :%cn", (fm>15)&1 ?'Y':'N' ); - printf("----------n"); - printf("OS Enabled AVX :%cn", (fm>9)&1 ?'Y':'N'); - printf("AES :%cn", (fm>10)&1?'Y':'N'); - printf("CLMUL :%cn", (fm>11)&1?'Y':'N'); - printf("RDRAND :%cn", (fm>13)&1?'Y':'N'); - printf("F16C :%cn", (fm>14)&1?'Y':'N'); - - return 0; +#include "ipp.h" +#include <stdio.h> +int main(int argc, char* argv[]) +{ + const IppLibraryVersion *lib; + Ipp64u fm; + IppStatus status; + + status= ippInit(); //IPP initialization with the best optimization layer + if( status != ippStsNoErr ) { + printf("IppInit() Error:n"); + printf("%sn", ippGetStatusString(status) ); + return -1; } + + //Get version info + lib = ippiGetLibVersion(); + printf("%s %sn", lib->Name, lib->Version); + + //Get CPU features enabled with selected library level + fm=ippGetEnabledCpuFeatures(); + printf("SSE :%cn",(fm>1)&1?'Y':'N'); + printf("SSE2 :%cn",(fm>2)&1?'Y':'N'); + printf("SSE3 :%cn",(fm>3)&1?'Y':'N'); + printf("SSSE3 :%cn",(fm>4)&1?'Y':'N'); + printf("SSE41 :%cn",(fm>6)&1?'Y':'N'); + printf("SSE42 :%cn",(fm>7)&1?'Y':'N'); + printf("AVX :%cn",(fm>8)&1 ?'Y':'N'); + printf("AVX2 :%cn", (fm>15)&1 ?'Y':'N' ); + printf("----------n"); + printf("OS Enabled AVX :%cn", (fm>9)&1 ?'Y':'N'); + printf("AES :%cn", (fm>10)&1?'Y':'N'); + printf("CLMUL :%cn", (fm>11)&1?'Y':'N'); + printf("RDRAND :%cn", (fm>13)&1?'Y':'N'); + printf("F16C :%cn", (fm>14)&1?'Y':'N'); + + return 0; +} ``` Compile above example, using any compiler and the ipp module. diff --git a/docs.it4i/salomon/software/intel-suite/intel-mkl.md b/docs.it4i/software/intel/intel-suite/intel-mkl.md similarity index 98% rename from docs.it4i/salomon/software/intel-suite/intel-mkl.md rename to docs.it4i/software/intel/intel-suite/intel-mkl.md index 6b54e0890202f817dd42c04eabf886489bd695d0..2053e958b2673acb4fc79e4e552bea5cf016d85e 100644 --- a/docs.it4i/salomon/software/intel-suite/intel-mkl.md +++ b/docs.it4i/software/intel/intel-suite/intel-mkl.md @@ -15,9 +15,10 @@ Intel Math Kernel Library (Intel MKL) is a library of math kernel subroutines, e For details see the [Intel MKL Reference Manual](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mklman/index.htm). -Intel MKL version 11.2.3.187 is available on the cluster +Intel MKL is available on the cluster ```console +$ ml av imkl $ ml imkl ``` @@ -51,7 +52,7 @@ Advantage in using Intel MKL library is that it brings threaded parallelization For this to work, the application must link the threaded MKL library (default). Number and behaviour of MKL threads may be controlled via the OpenMP environment variables, such as OMP_NUM_THREADS and KMP_AFFINITY. MKL_NUM_THREADS takes precedence over OMP_NUM_THREADS ```console -$ export OMP_NUM_THREADS=24 +$ export OMP_NUM_THREADS=24 # 16 for Anselm $ export KMP_AFFINITY=granularity=fine,compact,1,0 ``` diff --git a/docs.it4i/salomon/software/intel-suite/intel-parallel-studio-introduction.md b/docs.it4i/software/intel/intel-suite/intel-parallel-studio-introduction.md similarity index 92% rename from docs.it4i/salomon/software/intel-suite/intel-parallel-studio-introduction.md rename to docs.it4i/software/intel/intel-suite/intel-parallel-studio-introduction.md index b22274a0e0a4c32942b15ba90244621eba21aa54..7b6ba956b932b63d535dc0e3aeb7667385fdccf8 100644 --- a/docs.it4i/salomon/software/intel-suite/intel-parallel-studio-introduction.md +++ b/docs.it4i/software/intel/intel-suite/intel-parallel-studio-introduction.md @@ -15,7 +15,7 @@ Intel Parallel Studio XE ## Intel Compilers -The Intel compilers version 131.3 are available, via module iccifort/2013.5.192-GCC-4.8.3. The compilers include the icc C and C++ compiler and the ifort fortran 77/90/95 compiler. +The Intel compilers are available, via module intel. The compilers include the icc C and C++ compiler and the ifort fortran 77/90/95 compiler. ```console $ ml intel diff --git a/docs.it4i/salomon/software/intel-suite/intel-tbb.md b/docs.it4i/software/intel/intel-suite/intel-tbb.md similarity index 100% rename from docs.it4i/salomon/software/intel-suite/intel-tbb.md rename to docs.it4i/software/intel/intel-suite/intel-tbb.md diff --git a/docs.it4i/salomon/software/intel-suite/intel-trace-analyzer-and-collector.md b/docs.it4i/software/intel/intel-suite/intel-trace-analyzer-and-collector.md similarity index 90% rename from docs.it4i/salomon/software/intel-suite/intel-trace-analyzer-and-collector.md rename to docs.it4i/software/intel/intel-suite/intel-trace-analyzer-and-collector.md index 9cae361ca43dccb382bd5b09f5c5a9d270e0414c..b7bf6c92d3a03112392a86078037aeff28e8623f 100644 --- a/docs.it4i/salomon/software/intel-suite/intel-trace-analyzer-and-collector.md +++ b/docs.it4i/software/intel/intel-suite/intel-trace-analyzer-and-collector.md @@ -21,7 +21,7 @@ The trace will be saved in file myapp.stf in the current directory. ## Viewing Traces -To view and analyze the trace, open ITAC GUI in a [graphical environment](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/): +To view and analyze the trace, open ITAC GUI in a [graphical environment](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/): ```console $ ml itac/9.1.2.024 @@ -30,7 +30,7 @@ $ traceanalyzer The GUI will launch and you can open the produced `*`.stf file. - + Please refer to Intel documenation about usage of the GUI tool. diff --git a/docs.it4i/anselm/software/intel-xeon-phi.md b/docs.it4i/software/intel/intel-xeon-phi-anselm.md similarity index 77% rename from docs.it4i/anselm/software/intel-xeon-phi.md rename to docs.it4i/software/intel/intel-xeon-phi-anselm.md index d879361135e715e4af6862ed6636adb45a895fb1..b1e86256d093b4bd34fe799e48f64d38f48d0e83 100644 --- a/docs.it4i/anselm/software/intel-xeon-phi.md +++ b/docs.it4i/software/intel/intel-xeon-phi-anselm.md @@ -27,65 +27,65 @@ $ /usr/bin/micinfo The output of the "micinfo" utility executed on one of the Anselm node is as follows. (note: to get PCIe related details the command has to be run with root privileges) ```console - MicInfo Utility Log - - Created Mon Jul 22 00:23:50 2013 - - System Info - HOST OS : Linux - OS Version : 2.6.32-279.5.2.bl6.Bull.33.x86_64 - Driver Version : 6720-15 - MPSS Version : 2.1.6720-15 - Host Physical Memory : 98843 MB - - Device No: 0, Device Name: mic0 - - Version - Flash Version : 2.1.03.0386 - SMC Firmware Version : 1.15.4830 - SMC Boot Loader Version : 1.8.4326 - uOS Version : 2.6.38.8-g2593b11 - Device Serial Number : ADKC30102482 - - Board - Vendor ID : 0x8086 - Device ID : 0x2250 - Subsystem ID : 0x2500 - Coprocessor Stepping ID : 3 - PCIe Width : x16 - PCIe Speed : 5 GT/s - PCIe Max payload size : 256 bytes - PCIe Max read req size : 512 bytes - Coprocessor Model : 0x01 - Coprocessor Model Ext : 0x00 - Coprocessor Type : 0x00 - Coprocessor Family : 0x0b - Coprocessor Family Ext : 0x00 - Coprocessor Stepping : B1 - Board SKU : B1PRQ-5110P/5120D - ECC Mode : Enabled - SMC HW Revision : Product 225W Passive CS - - Cores - Total No of Active Cores : 60 - Voltage : 1032000 uV - Frequency : 1052631 kHz - - Thermal - Fan Speed Control : N/A - Fan RPM : N/A - Fan PWM : N/A - Die Temp : 49 C - - GDDR - GDDR Vendor : Elpida - GDDR Version : 0x1 - GDDR Density : 2048 Mb - GDDR Size : 7936 MB - GDDR Technology : GDDR5 - GDDR Speed : 5.000000 GT/s - GDDR Frequency : 2500000 kHz - GDDR Voltage : 1501000 uV +MicInfo Utility Log +Created Wed Sep 13 13:44:14 2017 + + + System Info + HOST OS : Linux + OS Version : 2.6.32-696.3.2.el6.Bull.120.x86_64 + Driver Version : 3.4.9-1 + MPSS Version : 3.4.9 + Host Physical Memory : 98836 MB + +Device No: 0, Device Name: mic0 + + Version + Flash Version : 2.1.02.0391 + SMC Firmware Version : 1.17.6900 + SMC Boot Loader Version : 1.8.4326 + uOS Version : 2.6.38.8+mpss3.4.9 + Device Serial Number : ADKC30102489 + + Board + Vendor ID : 0x8086 + Device ID : 0x2250 + Subsystem ID : 0x2500 + Coprocessor Stepping ID : 3 + PCIe Width : x16 + PCIe Speed : 5 GT/s + PCIe Max payload size : 256 bytes + PCIe Max read req size : 512 bytes + Coprocessor Model : 0x01 + Coprocessor Model Ext : 0x00 + Coprocessor Type : 0x00 + Coprocessor Family : 0x0b + Coprocessor Family Ext : 0x00 + Coprocessor Stepping : B1 + Board SKU : B1PRQ-5110P/5120D + ECC Mode : Enabled + SMC HW Revision : Product 225W Passive CS + + Cores + Total No of Active Cores : 60 + Voltage : 1009000 uV + Frequency : 1052631 kHz + + Thermal + Fan Speed Control : N/A + Fan RPM : N/A + Fan PWM : N/A + Die Temp : 53 C + + GDDR + GDDR Vendor : Elpida + GDDR Version : 0x1 + GDDR Density : 2048 Mb + GDDR Size : 7936 MB + GDDR Technology : GDDR5 + GDDR Speed : 5.000000 GT/s + GDDR Frequency : 2500000 kHz + GDDR Voltage : 1501000 uV ``` ## Offload Mode @@ -108,24 +108,24 @@ A very basic example of code that employs offload programming technique is shown !!! note This code is sequential and utilizes only single core of the accelerator. -```console +```cpp $ vim source-offload.cpp - #include <iostream> +#include <iostream> - int main(int argc, char* argv[]) - { - const int niter = 100000; - double result = 0; +int main(int argc, char* argv[]) +{ + const int niter = 100000; + double result = 0; - #pragma offload target(mic) - for (int i = 0; i < niter; ++i) { - const double t = (i + 0.5) / niter; - result += 4.0 / (t * t + 1.0); - } - result /= niter; - std::cout << "Pi ~ " << result << 'n'; + #pragma offload target(mic) + for (int i = 0; i < niter; ++i) { + const double t = (i + 0.5) / niter; + result += 4.0 / (t * t + 1.0); } + result /= niter; + std::cout << "Pi ~ " << result << 'n'; +} ``` To compile a code using Intel compiler run @@ -144,82 +144,82 @@ $ ./bin-offload One way of paralelization a code for Xeon Phi is using OpenMP directives. The following example shows code for parallel vector addition. -```console +```cpp $ vim ./vect-add - #include <stdio.h> - - typedef int T; - - #define SIZE 1000 - - #pragma offload_attribute(push, target(mic)) - T in1[SIZE]; - T in2[SIZE]; - T res[SIZE]; - #pragma offload_attribute(pop) - - // MIC function to add two vectors - __attribute__((target(mic))) add_mic(T *a, T *b, T *c, int size) { - int i = 0; - #pragma omp parallel for - for (i = 0; i < size; i++) - c[i] = a[i] + b[i]; +#include <stdio.h> + +typedef int T; + +#define SIZE 1000 + +#pragma offload_attribute(push, target(mic)) +T in1[SIZE]; +T in2[SIZE]; +T res[SIZE]; +#pragma offload_attribute(pop) + +// MIC function to add two vectors +__attribute__((target(mic))) add_mic(T *a, T *b, T *c, int size) { + int i = 0; + #pragma omp parallel for + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +// CPU function to add two vectors +void add_cpu (T *a, T *b, T *c, int size) { + int i; + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +// CPU function to generate a vector of random numbers +void random_T (T *a, int size) { + int i; + for (i = 0; i < size; i++) + a[i] = rand() % 10000; // random number between 0 and 9999 +} + +// CPU function to compare two vectors +int compare(T *a, T *b, T size ){ + int pass = 0; + int i; + for (i = 0; i < size; i++){ + if (a[i] != b[i]) { + printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); + pass = 1; } + } + if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); + return pass; +} - // CPU function to add two vectors - void add_cpu (T *a, T *b, T *c, int size) { - int i; - for (i = 0; i < size; i++) - c[i] = a[i] + b[i]; - } +int main() +{ + int i; + random_T(in1, SIZE); + random_T(in2, SIZE); - // CPU function to generate a vector of random numbers - void random_T (T *a, int size) { - int i; - for (i = 0; i < size; i++) - a[i] = rand() % 10000; // random number between 0 and 9999 - } + #pragma offload target(mic) in(in1,in2) inout(res) + { - // CPU function to compare two vectors - int compare(T *a, T *b, T size ){ - int pass = 0; - int i; - for (i = 0; i < size; i++){ - if (a[i] != b[i]) { - printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); - pass = 1; - } - } - if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); - return pass; - } + // Parallel loop from main function + #pragma omp parallel for + for (i=0; i<SIZE; i++) + res[i] = in1[i] + in2[i]; - int main() - { - int i; - random_T(in1, SIZE); - random_T(in2, SIZE); + // or parallel loop is called inside the function + add_mic(in1, in2, res, SIZE); - #pragma offload target(mic) in(in1,in2) inout(res) - { + } - // Parallel loop from main function - #pragma omp parallel for - for (i=0; i<SIZE; i++) - res[i] = in1[i] + in2[i]; + //Check the results with CPU implementation + T res_cpu[SIZE]; + add_cpu(in1, in2, res_cpu, SIZE); + compare(res, res_cpu, SIZE); - // or parallel loop is called inside the function - add_mic(in1, in2, res, SIZE); - - } - - //Check the results with CPU implementation - T res_cpu[SIZE]; - add_cpu(in1, in2, res_cpu, SIZE); - compare(res, res_cpu, SIZE); - - } +} ``` During the compilation Intel compiler shows which loops have been vectorized in both host and accelerator. This can be enabled with compiler option "-vec-report2". To compile and execute the code run @@ -271,61 +271,61 @@ $ module load intel Following example show how to automatically offload an SGEMM (single precision - general matrix multiply) function to MIC coprocessor. The code can be copied to a file and compiled without any necessary modification. -```console +```cpp $ vim sgemm-ao-short.c - #include <stdio.h> - #include <stdlib.h> - #include <malloc.h> - #include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <malloc.h> +#include <stdint.h> - #include "mkl.h" +#include "mkl.h" - int main(int argc, char **argv) - { - float *A, *B, *C; /* Matrices */ +int main(int argc, char **argv) +{ + float *A, *B, *C; /* Matrices */ - MKL_INT N = 2560; /* Matrix dimensions */ - MKL_INT LD = N; /* Leading dimension */ - int matrix_bytes; /* Matrix size in bytes */ - int matrix_elements; /* Matrix size in elements */ + MKL_INT N = 2560; /* Matrix dimensions */ + MKL_INT LD = N; /* Leading dimension */ + int matrix_bytes; /* Matrix size in bytes */ + int matrix_elements; /* Matrix size in elements */ - float alpha = 1.0, beta = 1.0; /* Scaling factors */ - char transa = 'N', transb = 'N'; /* Transposition options */ + float alpha = 1.0, beta = 1.0; /* Scaling factors */ + char transa = 'N', transb = 'N'; /* Transposition options */ - int i, j; /* Counters */ + int i, j; /* Counters */ - matrix_elements = N * N; - matrix_bytes = sizeof(float) * matrix_elements; + matrix_elements = N * N; + matrix_bytes = sizeof(float) * matrix_elements; - /* Allocate the matrices */ - A = malloc(matrix_bytes); B = malloc(matrix_bytes); C = malloc(matrix_bytes); + /* Allocate the matrices */ + A = malloc(matrix_bytes); B = malloc(matrix_bytes); C = malloc(matrix_bytes); - /* Initialize the matrices */ - for (i = 0; i < matrix_elements; i++) { - A[i] = 1.0; B[i] = 2.0; C[i] = 0.0; - } + /* Initialize the matrices */ + for (i = 0; i < matrix_elements; i++) { + A[i] = 1.0; B[i] = 2.0; C[i] = 0.0; + } - printf("Computing SGEMM on the hostn"); - sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + printf("Computing SGEMM on the hostn"); + sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); - printf("Enabling Automatic Offloadn"); - /* Alternatively, set environment variable MKL_MIC_ENABLE=1 */ - mkl_mic_enable(); + printf("Enabling Automatic Offloadn"); + /* Alternatively, set environment variable MKL_MIC_ENABLE=1 */ + mkl_mic_enable(); - int ndevices = mkl_mic_get_device_count(); /* Number of MIC devices */ - printf("Automatic Offload enabled: %d MIC devices presentn", ndevices); + int ndevices = mkl_mic_get_device_count(); /* Number of MIC devices */ + printf("Automatic Offload enabled: %d MIC devices presentn", ndevices); - printf("Computing SGEMM with automatic workdivisionn"); - sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + printf("Computing SGEMM with automatic workdivisionn"); + sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); - /* Free the matrix memory */ - free(A); free(B); free(C); + /* Free the matrix memory */ + free(A); free(B); free(C); - printf("Donen"); + printf("Donen"); - return 0; - } + return 0; +} ``` !!! note @@ -573,28 +573,28 @@ $ mpiicc -mmic -o mpi-test-mic mpi-test.c An example of basic MPI version of "hello-world" example in C language, that can be executed on both host and Xeon Phi is (can be directly copy and pasted to a .c file) ```cpp - #include <stdio.h> - #include <mpi.h> +#include <stdio.h> +#include <mpi.h> - int main (argc, argv) - int argc; - char *argv[]; - { - int rank, size; +int main (argc, argv) + int argc; + char *argv[]; +{ + int rank, size; - int len; - char node[MPI_MAX_PROCESSOR_NAME]; + int len; + char node[MPI_MAX_PROCESSOR_NAME]; - MPI_Init (&argc, &argv); /* starts MPI */ - MPI_Comm_rank (MPI_COMM_WORLD, &rank); /* get current process id */ - MPI_Comm_size (MPI_COMM_WORLD, &size); /* get number of processes */ + MPI_Init (&argc, &argv); /* starts MPI */ + MPI_Comm_rank (MPI_COMM_WORLD, &rank); /* get current process id */ + MPI_Comm_size (MPI_COMM_WORLD, &size); /* get number of processes */ - MPI_Get_processor_name(node,&len); + MPI_Get_processor_name(node,&len); - printf( "Hello world from process %d of %d on host %s n", rank, size, node ); - MPI_Finalize(); - return 0; - } + printf( "Hello world from process %d of %d on host %s n", rank, size, node ); + MPI_Finalize(); + return 0; +} ``` ### MPI Programming Models diff --git a/docs.it4i/salomon/software/intel-xeon-phi.md b/docs.it4i/software/intel/intel-xeon-phi-salomon.md similarity index 75% rename from docs.it4i/salomon/software/intel-xeon-phi.md rename to docs.it4i/software/intel/intel-xeon-phi-salomon.md index 6d161439b7871e097ae095a4103a1f37ab490a0e..8d6c9c61903b9aaf6d93073a6d338efd2a7c013a 100644 --- a/docs.it4i/salomon/software/intel-xeon-phi.md +++ b/docs.it4i/software/intel/intel-xeon-phi-salomon.md @@ -28,113 +28,113 @@ The output of the "micinfo" utility executed on one of the cluster node is as fo ```console MicInfo Utility Log -Created Mon Aug 17 13:55:59 2015 +Created Wed Sep 13 13:39:28 2017 - System Info - HOST OS : Linux - OS Version : 2.6.32-504.16.2.el6.x86_64 - Driver Version : 3.4.1-1 - MPSS Version : 3.4.1 - Host Physical Memory : 131930 MB + System Info + HOST OS : Linux + OS Version : 2.6.32-696.3.2.el6.x86_64 + Driver Version : 3.8.2-1 + MPSS Version : 3.8.2 + Host Physical Memory : 128838 MB Device No: 0, Device Name: mic0 - Version - Flash Version : 2.1.02.0390 - SMC Firmware Version : 1.16.5078 - SMC Boot Loader Version : 1.8.4326 - uOS Version : 2.6.38.8+mpss3.4.1 - Device Serial Number : ADKC44601414 - - Board - Vendor ID : 0x8086 - Device ID : 0x225c - Subsystem ID : 0x7d95 - Coprocessor Stepping ID : 2 - PCIe Width : x16 - PCIe Speed : 5 GT/s - PCIe Max payload size : 256 bytes - PCIe Max read req size : 512 bytes - Coprocessor Model : 0x01 - Coprocessor Model Ext : 0x00 - Coprocessor Type : 0x00 - Coprocessor Family : 0x0b - Coprocessor Family Ext : 0x00 - Coprocessor Stepping : C0 - Board SKU : C0PRQ-7120 P/A/X/D - ECC Mode : Enabled - SMC HW Revision : Product 300W Passive CS - - Cores - Total No of Active Cores : 61 - Voltage : 1007000 uV - Frequency : 1238095 kHz - - Thermal - Fan Speed Control : N/A - Fan RPM : N/A - Fan PWM : N/A - Die Temp : 60 C - - GDDR - GDDR Vendor : Samsung - GDDR Version : 0x6 - GDDR Density : 4096 Mb - GDDR Size : 15872 MB - GDDR Technology : GDDR5 - GDDR Speed : 5.500000 GT/s - GDDR Frequency : 2750000 kHz - GDDR Voltage : 1501000 uV + Version + Flash Version : 2.1.02.0391 + SMC Firmware Version : 1.17.6900 + SMC Boot Loader Version : 1.8.4326 + Coprocessor OS Version : 2.6.38.8+mpss3.8.2 + Device Serial Number : ADKC44601725 + + Board + Vendor ID : 0x8086 + Device ID : 0x225c + Subsystem ID : 0x7d95 + Coprocessor Stepping ID : 2 + PCIe Width : x16 + PCIe Speed : 5 GT/s + PCIe Max payload size : 256 bytes + PCIe Max read req size : 512 bytes + Coprocessor Model : 0x01 + Coprocessor Model Ext : 0x00 + Coprocessor Type : 0x00 + Coprocessor Family : 0x0b + Coprocessor Family Ext : 0x00 + Coprocessor Stepping : C0 + Board SKU : C0PRQ-7120 P/A/X/D + ECC Mode : Enabled + SMC HW Revision : Product 300W Passive CS + + Cores + Total No of Active Cores : 61 + Voltage : 1041000 uV + Frequency : 1238095 kHz + + Thermal + Fan Speed Control : N/A + Fan RPM : N/A + Fan PWM : N/A + Die Temp : 50 C + + GDDR + GDDR Vendor : Samsung + GDDR Version : 0x6 + GDDR Density : 4096 Mb + GDDR Size : 15872 MB + GDDR Technology : GDDR5 + GDDR Speed : 5.500000 GT/s + GDDR Frequency : 2750000 kHz + GDDR Voltage : 1501000 uV Device No: 1, Device Name: mic1 - Version - Flash Version : 2.1.02.0390 - SMC Firmware Version : 1.16.5078 - SMC Boot Loader Version : 1.8.4326 - uOS Version : 2.6.38.8+mpss3.4.1 - Device Serial Number : ADKC44500454 - - Board - Vendor ID : 0x8086 - Device ID : 0x225c - Subsystem ID : 0x7d95 - Coprocessor Stepping ID : 2 - PCIe Width : x16 - PCIe Speed : 5 GT/s - PCIe Max payload size : 256 bytes - PCIe Max read req size : 512 bytes - Coprocessor Model : 0x01 - Coprocessor Model Ext : 0x00 - Coprocessor Type : 0x00 - Coprocessor Family : 0x0b - Coprocessor Family Ext : 0x00 - Coprocessor Stepping : C0 - Board SKU : C0PRQ-7120 P/A/X/D - ECC Mode : Enabled - SMC HW Revision : Product 300W Passive CS - - Cores - Total No of Active Cores : 61 - Voltage : 998000 uV - Frequency : 1238095 kHz - - Thermal - Fan Speed Control : N/A - Fan RPM : N/A - Fan PWM : N/A - Die Temp : 59 C - - GDDR - GDDR Vendor : Samsung - GDDR Version : 0x6 - GDDR Density : 4096 Mb - GDDR Size : 15872 MB - GDDR Technology : GDDR5 - GDDR Speed : 5.500000 GT/s - GDDR Frequency : 2750000 kHz - GDDR Voltage : 1501000 uV + Version + Flash Version : 2.1.02.0391 + SMC Firmware Version : 1.17.6900 + SMC Boot Loader Version : 1.8.4326 + Coprocessor OS Version : 2.6.38.8+mpss3.8.2 + Device Serial Number : ADKC44601893 + + Board + Vendor ID : 0x8086 + Device ID : 0x225c + Subsystem ID : 0x7d95 + Coprocessor Stepping ID : 2 + PCIe Width : x16 + PCIe Speed : 5 GT/s + PCIe Max payload size : 256 bytes + PCIe Max read req size : 512 bytes + Coprocessor Model : 0x01 + Coprocessor Model Ext : 0x00 + Coprocessor Type : 0x00 + Coprocessor Family : 0x0b + Coprocessor Family Ext : 0x00 + Coprocessor Stepping : C0 + Board SKU : C0PRQ-7120 P/A/X/D + ECC Mode : Enabled + SMC HW Revision : Product 300W Passive CS + + Cores + Total No of Active Cores : 61 + Voltage : 1053000 uV + Frequency : 1238095 kHz + + Thermal + Fan Speed Control : N/A + Fan RPM : N/A + Fan PWM : N/A + Die Temp : 48 C + + GDDR + GDDR Vendor : Samsung + GDDR Version : 0x6 + GDDR Density : 4096 Mb + GDDR Size : 15872 MB + GDDR Technology : GDDR5 + GDDR Speed : 5.500000 GT/s + GDDR Frequency : 2750000 kHz + GDDR Voltage : 1501000 uV ``` ## Offload Mode @@ -154,7 +154,7 @@ export OFFLOAD_REPORT=3 A very basic example of code that employs offload programming technique is shown in the next listing. Please note that this code is sequential and utilizes only single core of the accelerator. -```console +```cpp $ cat source-offload.cpp #include <iostream> @@ -190,82 +190,82 @@ $ ./bin-offload One way of paralelization a code for Xeon Phi is using OpenMP directives. The following example shows code for parallel vector addition. -```console +```cpp $ cat ./vect-add - #include <stdio.h> +#include <stdio.h> - typedef int T; +typedef int T; - #define SIZE 1000 +#define SIZE 1000 - #pragma offload_attribute(push, target(mic)) - T in1[SIZE]; - T in2[SIZE]; - T res[SIZE]; - #pragma offload_attribute(pop) +#pragma offload_attribute(push, target(mic)) +T in1[SIZE]; +T in2[SIZE]; +T res[SIZE]; +#pragma offload_attribute(pop) - // MIC function to add two vectors - __attribute__((target(mic))) add_mic(T *a, T *b, T *c, int size) { - int i = 0; - #pragma omp parallel for - for (i = 0; i < size; i++) - c[i] = a[i] + b[i]; - } +// MIC function to add two vectors +__attribute__((target(mic))) add_mic(T *a, T *b, T *c, int size) { + int i = 0; + #pragma omp parallel for + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} - // CPU function to add two vectors - void add_cpu (T *a, T *b, T *c, int size) { - int i; - for (i = 0; i < size; i++) - c[i] = a[i] + b[i]; - } +// CPU function to add two vectors +void add_cpu (T *a, T *b, T *c, int size) { + int i; + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} - // CPU function to generate a vector of random numbers - void random_T (T *a, int size) { - int i; - for (i = 0; i < size; i++) - a[i] = rand() % 10000; // random number between 0 and 9999 - } +// CPU function to generate a vector of random numbers +void random_T (T *a, int size) { + int i; + for (i = 0; i < size; i++) + a[i] = rand() % 10000; // random number between 0 and 9999 +} - // CPU function to compare two vectors - int compare(T *a, T *b, T size ){ - int pass = 0; - int i; - for (i = 0; i < size; i++){ - if (a[i] != b[i]) { - printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); - pass = 1; - } - } - if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); - return pass; +// CPU function to compare two vectors +int compare(T *a, T *b, T size ){ + int pass = 0; + int i; + for (i = 0; i < size; i++){ + if (a[i] != b[i]) { + printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); + pass = 1; } + } + if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); + return pass; +} - int main() - { - int i; - random_T(in1, SIZE); - random_T(in2, SIZE); +int main() +{ + int i; + random_T(in1, SIZE); + random_T(in2, SIZE); - #pragma offload target(mic) in(in1,in2) inout(res) - { + #pragma offload target(mic) in(in1,in2) inout(res) + { - // Parallel loop from main function - #pragma omp parallel for - for (i=0; i<SIZE; i++) - res[i] = in1[i] + in2[i]; + // Parallel loop from main function + #pragma omp parallel for + for (i=0; i<SIZE; i++) + res[i] = in1[i] + in2[i]; - // or parallel loop is called inside the function - add_mic(in1, in2, res, SIZE); + // or parallel loop is called inside the function + add_mic(in1, in2, res, SIZE); - } + } - //Check the results with CPU implementation - T res_cpu[SIZE]; - add_cpu(in1, in2, res_cpu, SIZE); - compare(res, res_cpu, SIZE); + //Check the results with CPU implementation + T res_cpu[SIZE]; + add_cpu(in1, in2, res_cpu, SIZE); + compare(res, res_cpu, SIZE); - } +} ``` During the compilation Intel compiler shows which loops have been vectorized in both host and accelerator. This can be enabled with compiler option "-vec-report2". To compile and execute the code run @@ -315,9 +315,9 @@ $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:acceler $ ml intel ``` -The code can be copied to a file and compiled without any necessary modification. +The code can be copied to a file and compiled without any necessary modification. -```console +```cpp $ vim sgemm-ao-short.c #include <stdio.h> @@ -329,46 +329,46 @@ $ vim sgemm-ao-short.c int main(int argc, char **argv) { - float *A, *B, *C; /* Matrices */ + float *A, *B, *C; /* Matrices */ - MKL_INT N = 2560; /* Matrix dimensions */ - MKL_INT LD = N; /* Leading dimension */ - int matrix_bytes; /* Matrix size in bytes */ - int matrix_elements; /* Matrix size in elements */ + MKL_INT N = 2560; /* Matrix dimensions */ + MKL_INT LD = N; /* Leading dimension */ + int matrix_bytes; /* Matrix size in bytes */ + int matrix_elements; /* Matrix size in elements */ - float alpha = 1.0, beta = 1.0; /* Scaling factors */ - char transa = 'N', transb = 'N'; /* Transposition options */ + float alpha = 1.0, beta = 1.0; /* Scaling factors */ + char transa = 'N', transb = 'N'; /* Transposition options */ - int i, j; /* Counters */ + int i, j; /* Counters */ - matrix_elements = N * N; - matrix_bytes = sizeof(float) * matrix_elements; + matrix_elements = N * N; + matrix_bytes = sizeof(float) * matrix_elements; - /* Allocate the matrices */ - A = malloc(matrix_bytes); B = malloc(matrix_bytes); C = malloc(matrix_bytes); + /* Allocate the matrices */ + A = malloc(matrix_bytes); B = malloc(matrix_bytes); C = malloc(matrix_bytes); - /* Initialize the matrices */ - for (i = 0; i < matrix_elements; i++) { - A[i] = 1.0; B[i] = 2.0; C[i] = 0.0; - } + /* Initialize the matrices */ + for (i = 0; i < matrix_elements; i++) { + A[i] = 1.0; B[i] = 2.0; C[i] = 0.0; + } - printf("Computing SGEMM on the host\n"); - sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + printf("Computing SGEMM on the host\n"); + sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); - printf("Enabling Automatic Offload\n"); - /* Alternatively, set environment variable MKL_MIC_ENABLE=1 */ - mkl_mic_enable(); - - int ndevices = mkl_mic_get_device_count(); /* Number of MIC devices */ - printf("Automatic Offload enabled: %d MIC devices present\n", ndevices); + printf("Enabling Automatic Offload\n"); + /* Alternatively, set environment variable MKL_MIC_ENABLE=1 */ + mkl_mic_enable(); - printf("Computing SGEMM with automatic workdivision\n"); - sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + int ndevices = mkl_mic_get_device_count(); /* Number of MIC devices */ + printf("Automatic Offload enabled: %d MIC devices present\n", ndevices); - /* Free the matrix memory */ - free(A); free(B); free(C); + printf("Computing SGEMM with automatic workdivision\n"); + sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); - printf("Done\n"); + /* Free the matrix memory */ + free(A); free(B); free(C); + + printf("Done\n"); return 0; } @@ -392,7 +392,7 @@ $ export OFFLOAD_REPORT=2 The output of a code should look similar to following listing, where lines starting with [MKL] are generated by offload reporting: ```console -[user@r31u03n799 ~]$ ./sgemm +[user@r31u03n799 ~]$ ./sgemm Computing SGEMM on the host Enabling Automatic Offload Automatic Offload enabled: 2 MIC devices present @@ -413,7 +413,7 @@ Done !!! note "" Behavioral of automatic offload mode is controlled by functions called within the program or by environmental variables. Complete list of controls is listed [here](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_userguide_lnx/GUID-3DC4FC7D-A1E4-423D-9C0C-06AB265FFA86.htm). -### Automatic offload example #2 +### Automatic Offload Example #2 In this example, we will demonstrate automatic offload control via an environment vatiable MKL_MIC_ENABLE. The function DGEMM will be offloaded. @@ -517,10 +517,10 @@ For your information the list of libraries and their location required for execu !!! note /apps/all/icc/2015.3.187-GNU-5.1.0-2.25/composer_xe_2015.3.187/compiler/lib/mic - libiomp5.so - libimf.so - libsvml.so - libirng.so + libiomp5.so + libimf.so + libsvml.so + libirng.so libintlc.so.5 Finally, to run the compiled code use: @@ -551,12 +551,6 @@ First example "CapsBasic" detects OpenCL compatible hardware, here CPU and MIC, To compile and run the example copy it to your home directory, get a PBS interactive session on of the nodes with MIC and run make for compilation. Make files are very basic and shows how the OpenCL code can be compiled on Salomon. -```console -$ cp /apps/intel/opencl-examples/CapsBasic/* . -$ qsub -I -q qmic -A NONE-0-0 -$ make -``` - The compilation command for this example is: ```console @@ -566,69 +560,54 @@ $ g++ capsbasic.cpp -lOpenCL -o capsbasic -I/apps/intel/opencl/include/ After executing the complied binary file, following output should be displayed. ```console - ./capsbasic +./capsbasic - Number of available platforms: 1 - Platform names: - [0] Intel(R) OpenCL [Selected] - Number of devices available for each type: - CL_DEVICE_TYPE_CPU: 1 - CL_DEVICE_TYPE_GPU: 0 - CL_DEVICE_TYPE_ACCELERATOR: 1 +Number of available platforms: 1 +Platform names: + [0] Intel(R) OpenCL [Selected] +Number of devices available for each type: + CL_DEVICE_TYPE_CPU: 1 + CL_DEVICE_TYPE_GPU: 0 + CL_DEVICE_TYPE_ACCELERATOR: 1 - ** Detailed information for each device *** +** Detailed information for each device *** - CL_DEVICE_TYPE_CPU[0] - CL_DEVICE_NAME: Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz - CL_DEVICE_AVAILABLE: 1 +CL_DEVICE_TYPE_CPU[0] + CL_DEVICE_NAME: Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz + CL_DEVICE_AVAILABLE: 1 - ... +... - CL_DEVICE_TYPE_ACCELERATOR[0] - CL_DEVICE_NAME: Intel(R) Many Integrated Core Acceleration Card - CL_DEVICE_AVAILABLE: 1 +CL_DEVICE_TYPE_ACCELERATOR[0] + CL_DEVICE_NAME: Intel(R) Many Integrated Core Acceleration Card + CL_DEVICE_AVAILABLE: 1 - ... +... ``` !!! note More information about this example can be found on Intel website: <http://software.intel.com/en-us/vcsource/samples/caps-basic/> -The second example that can be found in "/apps/intel/opencl-examples" directory is General Matrix Multiply. You can follow the the same procedure to download the example to your directory and compile it. - -```console -$ cp -r /apps/intel/opencl-examples/* . -$ qsub -I -q qmic -A NONE-0-0 -$ cd GEMM -$ make -``` - -The compilation command for this example is: - -```console -$ g++ cmdoptions.cpp gemm.cpp ../common/basic.cpp ../common/cmdparser.cpp ../common/oclobject.cpp -I../common -lOpenCL -o gemm -I/apps/intel/opencl/include/ -``` - To see the performance of Intel Xeon Phi performing the DGEMM run the example as follows: ```console - ./gemm -d 1 - Platforms (1): - [0] Intel(R) OpenCL [Selected] - Devices (2): - [0] Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz - [1] Intel(R) Many Integrated Core Acceleration Card [Selected] - Build program options: "-DT=float -DTILE_SIZE_M=1 -DTILE_GROUP_M=16 -DTILE_SIZE_N=128 -DTILE_GROUP_N=1 -DTILE_SIZE_K=8" - Running gemm_nn kernel with matrix size: 3968x3968 - Memory row stride to ensure necessary alignment: 15872 bytes - Size of memory region for one matrix: 62980096 bytes - Using alpha = 0.57599 and beta = 0.872412 - ... - Host time: 0.292953 sec. - Host perf: 426.635 GFLOPS - Host time: 0.293334 sec. - Host perf: 426.081 GFLOPS - ... +./gemm -d 1 +Platforms (1): + [0] Intel(R) OpenCL [Selected] +Devices (2): + [0] Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz + [1] Intel(R) Many Integrated Core Acceleration Card [Selected] +Build program options: "-DT=float -DTILE_SIZE_M=1 -DTILE_GROUP_M=16 -DTILE_SIZE_N=128 -DTILE_GROUP_N=1 -DTILE_SIZE_K=8" +Running gemm_nn kernel with matrix size: 3968x3968 +Memory row stride to ensure necessary alignment: 15872 bytes +Size of memory region for one matrix: 62980096 bytes +Using alpha = 0.57599 and beta = 0.872412 +... +Host time: 0.292953 sec. +Host perf: 426.635 GFLOPS +Host time: 0.293334 sec. +Host perf: 426.081 GFLOPS +... ``` !!! hint @@ -648,7 +627,7 @@ $ export I_MPI_DAPL_PROVIDER_LIST=ofa-v2-mlx4_0-1u,ofa-v2-scif0,ofa-v2-mcm-1 This ensures, that MPI inside node will use SHMEM communication, between HOST and Phi the IB SCIF will be used and between different nodes or Phi's on diferent nodes a CCL-Direct proxy will be used. !!! note - Other FABRICS like tcp,ofa may be used (even combined with shm) but there's severe loss of performance (by order of magnitude). + Other FABRICS like tcp,ofa may be used (even combined with shm) but there's severe loss of performance (by order of magnitude). Usage of single DAPL PROVIDER (e. g. I_MPI_DAPL_PROVIDER=ofa-v2-mlx4_0-1u) will cause failure of Host<->Phi and/or Phi<->Phi communication. Usage of the I_MPI_DAPL_PROVIDER_LIST on non-accelerated node will cause failure of any MPI communication, since those nodes don't have SCIF device and there's no CCL-Direct proxy runnig. @@ -685,28 +664,28 @@ $ mpiifort -mmic -o mpi-test-mic mpi-test.f90 An example of basic MPI version of "hello-world" example in C language, that can be executed on both host and Xeon Phi is (can be directly copy and pasted to a .c file) ```cpp - #include <stdio.h> - #include <mpi.h> +#include <stdio.h> +#include <mpi.h> - int main (argc, argv) - int argc; - char *argv[]; - { - int rank, size; +int main (argc, argv) + int argc; + char *argv[]; +{ + int rank, size; - int len; - char node[MPI_MAX_PROCESSOR_NAME]; + int len; + char node[MPI_MAX_PROCESSOR_NAME]; - MPI_Init (&argc, &argv); /* starts MPI */ - MPI_Comm_rank (MPI_COMM_WORLD, &rank); /* get current process id */ - MPI_Comm_size (MPI_COMM_WORLD, &size); /* get number of processes */ + MPI_Init (&argc, &argv); /* starts MPI */ + MPI_Comm_rank (MPI_COMM_WORLD, &rank); /* get current process id */ + MPI_Comm_size (MPI_COMM_WORLD, &size); /* get number of processes */ - MPI_Get_processor_name(node,&len); + MPI_Get_processor_name(node,&len); - printf( "Hello world from process %d of %d on host %s n", rank, size, node ); - MPI_Finalize(); - return 0; - } + printf( "Hello world from process %d of %d on host %s n", rank, size, node ); + MPI_Finalize(); + return 0; +} ``` ### MPI Programming Models @@ -922,7 +901,7 @@ $ mpirun -genv LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH \ : -host r25u26n711 -n 6 ~/mpi-test ``` -### Symmetric model +### Symmetric Model In a symmetric mode MPI programs are executed on both host computer(s) and MIC accelerator(s). Since MIC has a different architecture and requires different binary file produced by the Intel compiler two different files has to be compiled before MPI program is executed. diff --git a/docs.it4i/anselm/software/isv_licenses.md b/docs.it4i/software/isv_licenses.md similarity index 100% rename from docs.it4i/anselm/software/isv_licenses.md rename to docs.it4i/software/isv_licenses.md diff --git a/docs.it4i/software/lang/conda.md b/docs.it4i/software/lang/conda.md new file mode 100644 index 0000000000000000000000000000000000000000..d9dfacd1194b47f40890ae4594e1fb92d12dea89 --- /dev/null +++ b/docs.it4i/software/lang/conda.md @@ -0,0 +1,144 @@ +# Conda (Anaconda) + +Conda is an open source package management system and environment management system that runs on Windows, macOS and Linux. Conda quickly installs, runs and updates packages and their dependencies. Conda easily creates, saves, loads and switches between environments on your local computer. It was created for Python programs, but it can package and distribute software for any language. + +Conda as a package manager helps you find and install packages. If you need a package that requires a different version of Python, you do not need to switch to a different environment manager, because conda is also an environment manager. With just a few commands, you can set up a totally separate environment to run that different version of Python, while continuing to run your usual version of Python in your normal environment. + +Conda treats Python the same as any other package, so it is easy to manage and update multiple installations. +Anaconda supports Python 2.7, 3.4, 3.5 and 3.6. The default is Python 2.7 or 3.6, depending on which installer you used: + +* For the installers “Anaconda” and “Miniconda,” the default is 2.7. +* For the installers “Anaconda3” or “Miniconda3,” the default is 3.6. + +## Conda on the IT4Innovations Clusters + +On the clusters we have the Anaconda2 and Anaconda3 software installed. How to use these modules is shown below. + +!!! note + Use the command `ml av conda` to get up-to-date versions of the modules. + +```console +$ ml av conda + +------------- /apps/modules/lang --------------------------------- +Anaconda2/4.4.0 Anaconda3/4.4.0 +``` + +## Anaconda2 + +The default is Python 2.7 + +### First Usage Module Anaconda2 + +```console +$ ml Anaconda2/4.4.0 +$ python --version +Python 2.7.13 :: Anaconda 4.4.0 (64-bit) +$ conda install numpy +Fetching package metadata ......... +Solving package specifications: . + +Package plan for installation in environment /apps/all/Anaconda2/4.4.0: + +The following packages will be UPDATED: + +anaconda: 4.4.0-np112py27_0 --> custom-py27_0 +... +... +... +CondaIOError: Missing write permissions in: /apps/all/Anaconda2/4.4.0 +# +# You don't appear to have the necessary permissions to install packages +# into the install area '/apps/all/Anaconda2/4.4.0'. +# However you can clone this environment into your home directory and +# then make changes to it. +# This may be done using the command: +# +# $ conda create -n my_root --clone="/apps/all/Anaconda2/4.4.0" +$ +$ conda create -n anaconda2 --clone="/apps/all/Anaconda2/4.4.0" +Source: /apps/all/Anaconda2/4.4.0 +Destination: /home/svi47/.conda/envs/anaconda2 +The following packages cannot be cloned out of the root environment: +- conda-4.3.21-py27_0 +- conda-env-2.6.0-0 +Packages: 213 +... +... +... +# +# To activate this environment, use: +# > source activate anaconda2 +# +# To deactivate this en +``` + +### Usage Module Anaconda2 + +```console +$ ml Anaconda2/4.4.0 +$ source activate anaconda2 +(anaconda2) ~]$ +``` + +## Anaconda3 + +The default is Python 3.6 + +### First Usage Module Anaconda3 + +```console +$ ml Anaconda3/4.4.0 +$ python --version +Python 3.6.1 :: Anaconda 4.4.0 (64-bit) +$ conda install numpy +Fetching package metadata ......... +Solving package specifications: . + +Package plan for installation in environment /apps/all/Anaconda3/4.4.0: + +The following packages will be UPDATED: + +anaconda: 4.4.0-np112py36_0 --> custom-py36_0 +... +... +... +CondaIOError: Missing write permissions in: /apps/all/Anaconda3/4.4.0 +# +# You don't appear to have the necessary permissions to install packages +# into the install area '/apps/all/Anaconda3/4.4.0'. +# However you can clone this environment into your home directory and +# then make changes to it. +# This may be done using the command: +# +# $ conda create -n my_root --clone="/apps/all/Anaconda3/4.4.0" +$ +$ conda create -n anaconda3 --clone="/apps/all/Anaconda3/4.4.0" +Source: /apps/all/Anaconda3/4.4.0 +Destination: /home/svi47/.conda/envs/anaconda3 +The following packages cannot be cloned out of the root environment: +- conda-4.3.21-py36_0 +- conda-env-2.6.0-0 +Packages: 200 +Files: 6 +... +... +... +# +# To activate this environment, use: +# > source activate anaconda3 +# +# To deactivate this environment, use: +# > source deactivate anaconda3 +# +$ source activate anaconda3 +(anaconda3) ~]$ +``` + +### Usage Module Anaconda3 + +```console +$ ml Anaconda3/4.4.0 +$ source activate anaconda3 +(anaconda3) ~]$ +``` diff --git a/docs.it4i/software/lang/csc.md b/docs.it4i/software/lang/csc.md new file mode 100644 index 0000000000000000000000000000000000000000..28e7b15bd7b306763e11c6071c020809678b1646 --- /dev/null +++ b/docs.it4i/software/lang/csc.md @@ -0,0 +1,122 @@ +# CSharp + +C# is available on the cluster. + +```console +$ ml av mono + +-------------------- /apps/modules/lang --------------- + Mono/5.0.0.100 +``` + +!!! note + Use the command `ml av mono` to get up-to-date versions of the modules. + +Activate C# by loading the Mono module: + +```console +$ ml Mono +``` + +## Examples + +### Hello World + +Copy this code to new file hello.cs: + +```csc +using System; + +class HelloWorld { + static void Main() { + Console.WriteLine("Hello world!!!"); + } +} +``` + +Compile the program and make *Windows executable*. + +```console +$ mcs -out:hello.exe hello.cs +``` + +Now run the program: + +```console +$ mono hello.exe +Hello world!!! +``` + +### Interactive Console + +Type: + +```console +$ csharp +Mono C# Shell, type "help;" for help + +Enter statements below. +csharp> +``` + +Now you are in interactive mode. You can try following example. + +```csc +csharp> using System; +csharp> int a = 5; +csharp> double b = 1.5; +csharp> Console.WriteLine("{0}*{1} is equal to {2}", a,b,a*b); +5*1.5 is equal to 7.5 +csharp> a == b +false +``` + +Show all files modified in last 5 days: + +```csc +csharp> using System.IO; +csharp> from f in Directory.GetFiles ("mydirectory") + > let fi = new FileInfo (f) + > where fi.LastWriteTime > DateTime.Now-TimeSpan.FromDays(5) select f; +{ "mydirectory/mynewfile.cs", "mydirectory/script.sh" } +``` + +## MPI.NET + +MPI is available for mono. + +```csc +using System; +using MPI; + +class MPIHello +{ + static void Main(string[] args) + { + using (new MPI.Environment(ref args)) + { + Console.WriteLine("Greetings from node {0} of {1} running on {2}", + Communicator.world.Rank, Communicator.world.Size, + MPI.Environment.ProcessorName); + } + } +} +``` + +Compile and run the program on Anselm: + +```console +$ qsub -I -A DD-13-5 -q qexp -l select=2:ncpus=16,walltime=00:30:00 + +$ ml mpi.net + +$ mcs -out:csc.exe -reference:/apps/tools/mpi.net/1.0.0-mono-3.12.1/lib/MPI.dll csc.cs + +$ mpirun -n 4 mono csc.exe +Greetings from node 2 of 4 running on cn204 +Greetings from node 0 of 4 running on cn204 +Greetings from node 3 of 4 running on cn199 +Greetings from node 1 of 4 running on cn199 +``` + +For more informations look at [Mono documentation page](http://www.mono-project.com/docs/). diff --git a/docs.it4i/software/lang/java.md b/docs.it4i/software/lang/java.md new file mode 100644 index 0000000000000000000000000000000000000000..d8e10e70a86d5e1b94606f72495a221d9edaa867 --- /dev/null +++ b/docs.it4i/software/lang/java.md @@ -0,0 +1,86 @@ +# Java + +Java is available on the cluster. Activate java by loading the Java module + +```console +$ ml Java +``` + +Note that the Java module must be loaded on the compute nodes as well, in order to run java on compute nodes. + +Check for java version and path + +```console +$ java -version +$ which java +``` + +With the module loaded, not only the runtime environment (JRE), but also the development environment (JDK) with the compiler is available. + +```console +$ javac -version +$ which javac +``` + +Java applications may use MPI for inter-process communication, in conjunction with Open MPI. Read more on <http://www.open-mpi.org/faq/?category=java>. This functionality is currently not supported on Anselm cluster. In case you require the java interface to MPI, please contact [cluster support](https://support.it4i.cz/rt/). + +## Java With OpenMPI + +Because there is an increasing interest in using Java for HPC. Also, MPI can benefit from Java because its widespread use makes it likely to find new uses beyond traditional HPC applications. + +The Java bindings are integrated into Open MPI starting from the v1.7 series. Beginning with the v2.0 series, the Java bindings include coverage of MPI-3.1. + +### Example (Hello.java) + +```java +import mpi.*; + +class Hello { + static public void main(String[] args) throws MPIException { + + + MPI.Init(args); + + int myrank = MPI.COMM_WORLD.getRank(); + int size = MPI.COMM_WORLD.getSize() ; + System.out.println("Hello world from rank " + myrank + " of " + size); + + MPI.Finalize(); + } +} +``` + +```console +$ ml Java/1.8.0_144 +$ ml OpenMPI/1.8.0_144 +$ mpijavac Hello.java +$ mpirun java Hello +Hello world from rank 23 of 28 +Hello world from rank 25 of 28 +Hello world from rank 0 of 28 +Hello world from rank 4 of 28 +Hello world from rank 7 of 28 +Hello world from rank 8 of 28 +Hello world from rank 11 of 28 +Hello world from rank 12 of 28 +Hello world from rank 13 of 28 +Hello world from rank 18 of 28 +Hello world from rank 17 of 28 +Hello world from rank 24 of 28 +Hello world from rank 27 of 28 +Hello world from rank 2 of 28 +Hello world from rank 3 of 28 +Hello world from rank 1 of 28 +Hello world from rank 10 of 28 +Hello world from rank 14 of 28 +Hello world from rank 16 of 28 +Hello world from rank 19 of 28 +Hello world from rank 26 of 28 +Hello world from rank 6 of 28 +Hello world from rank 9 of 28 +Hello world from rank 15 of 28 +Hello world from rank 20 of 28 +Hello world from rank 5 of 28 +Hello world from rank 21 of 28 +Hello world from rank 22 of 28 +``` \ No newline at end of file diff --git a/docs.it4i/software/lang/python.md b/docs.it4i/software/lang/python.md new file mode 100644 index 0000000000000000000000000000000000000000..0b7a58627ffed49c19f7f8ee541812e5086d0e66 --- /dev/null +++ b/docs.it4i/software/lang/python.md @@ -0,0 +1,186 @@ +# Python + +Python is a widely used high-level programming language for general-purpose programming, created by Guido van Rossum and first released in 1991. An interpreted language, Python has a design philosophy that emphasizes code readability (notably using whitespace indentation to delimit code blocks rather than curly brackets or keywords), and a syntax that allows programmers to express concepts in fewer lines of code than might be used in languages such as C++ or Java. The language provides constructs intended to enable writing clear programs on both a small and large scale. + +Python features a dynamic type system and automatic memory management and supports multiple programming paradigms, including object-oriented, imperative, functional programming, and procedural styles. It has a large and comprehensive standard library. + +* [Documentation for Python 3.X](http://docs.python.org/3/) +* [Documentation for Python 2.X](http://docs.python.org/2/) + +## Python on the IT4Innovations Clusters + +On the clusters we have the Python 2.X and Python 3.X software installed. How to use these modules is shown below. + +!!! note + Use the command `ml av python/` to get up-to-date versions of the modules. + +```console +$ ml av python/ + +-------------------------- /apps/modules/lang -------------------------- + Python/2.7.8-intel-2015b Python/2.7.9-gompi-2015e Python/2.7.10-GCC-4.9.3-2.25-bare Python/2.7.11-intel-2016a Python/3.4.3-intel-2015b Python/3.5.2-intel-2017.00 + Python/2.7.8-intel-2016.01 Python/2.7.9-ictce-7.3.5 Python/2.7.10-GNU-4.9.3-2.25-bare Python/2.7.11-intel-2017a Python/3.5.1-intel-2016.01 Python/3.5.2 + Python/2.7.9-foss-2015b Python/2.7.9-intel-2015b Python/2.7.11-foss-2016a Python/2.7.11-intel-2017.00 Python/3.5.1-intel-2017.00 Python/3.6.1 + Python/2.7.9-foss-2015g Python/2.7.9-intel-2016.01 Python/2.7.11-GCC-4.9.3-2.25-bare Python/2.7.13-base Python/3.5.1 Python/3.6.2-base (D) + Python/2.7.9-GNU-5.1.0-2.25 Python/2.7.9 Python/2.7.11-intel-2015b Python/2.7.13 Python/3.5.2-foss-2016a + +-------------------------- /apps/modules/math --------------------------- + ScientificPython/2.9.4-intel-2015b-Python-2.7.9 ScientificPython/2.9.4-intel-2015b-Python-2.7.11 ScientificPython/2.9.4-intel-2016.01-Python-2.7.9 (D) + + Where: + D: Default Module + + If you need software that is not listed, request it at support@it4i.cz. +``` + +## Python 2.X + +Python 2.7 is scheduled to be the last major version in the 2.x series before it moves into an extended maintenance period. This release contains many of the features that were first released in Python 3.1. + +```console +$ ml av python/2 + +----------------------------------------------------------------------------------------------- /apps/modules/lang ------------------------------------------------------------------------------------------------ + Python/2.7.8-intel-2015b Python/2.7.9-GNU-5.1.0-2.25 Python/2.7.9-intel-2016.01 Python/2.7.11-foss-2016a Python/2.7.11-intel-2017a + Python/2.7.8-intel-2016.01 Python/2.7.9-gompi-2015e Python/2.7.9 Python/2.7.11-GCC-4.9.3-2.25-bare Python/2.7.11-intel-2017.00 + Python/2.7.9-foss-2015b Python/2.7.9-ictce-7.3.5 Python/2.7.10-GCC-4.9.3-2.25-bare Python/2.7.11-intel-2015b Python/2.7.13-base + Python/2.7.9-foss-2015g Python/2.7.9-intel-2015b Python/2.7.10-GNU-4.9.3-2.25-bare Python/2.7.11-intel-2016a Python/2.7.13 + +----------------------------------------------------------------------------------------------- /apps/modules/math ------------------------------------------------------------------------------------------------ + ScientificPython/2.9.4-intel-2015b-Python-2.7.9 ScientificPython/2.9.4-intel-2015b-Python-2.7.11 ScientificPython/2.9.4-intel-2016.01-Python-2.7.9 (D) + + Where: + D: Default Module + + If you need software that is not listed, request it at support@it4i.cz. +``` + +### Used Module Python/2.x + +```console +$ python --version +Python 2.6.6 +$ ml Python/2.7.13 +$ python --version +Python 2.7.1 +``` + +### Packages in Python/2.x + +```console +$ pip list +appdirs (1.4.3) +asn1crypto (0.22.0) +backports-abc (0.5) +backports.shutil-get-terminal-size (1.0.0) +backports.ssl-match-hostname (3.5.0.1) +BeautifulSoup (3.2.1) +beautifulsoup4 (4.5.3) +... +``` + +### How to Install New Package to Python/2.x? + +```console +$ ml Python/2.7.13 +$ python --version +$ pip install wheel --user +Collecting wheel + Downloading wheel-0.30.0-py2.py3-none-any.whl (49kB) + 100% |â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–| 51kB 835kB/s +Installing collected packages: wheel +Successfully installed wheel-0.30.0 +``` + +### How to Update Package in Python/2.x? + +```console +$ ml Python/2.7.13 +$ python --version +$ pip install scipy --upgrade --user +Collecting scipy + Downloading scipy-0.19.1-cp27-cp27mu-manylinux1_x86_64.whl (45.0MB) + 100% |â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–| 45.0MB 5.8kB/s +Requirement already up-to-date: numpy>=1.8.2 in /apps/all/Python/2.7.13/lib/python2.7/site-packages (from scipy) +Installing collected packages: scipy +Successfully installed scipy-0.19.1 +``` + +## Python 3.X + +Python 3.0 (a.k.a. "Python 3000" or "Py3k") is a new version of the language that is incompatible with the 2.x line of releases. The language is mostly the same, but many details, especially how built-in objects like dictionaries and strings work, have changed considerably, and a lot of deprecated features have finally been removed. Also, the standard library has been reorganized in a few prominent places. + +```console +$ ml av python/3 + +---------------------- /apps/modules/lang ---------------------- + Python/3.4.3-intel-2015b Python/3.5.1-intel-2017.00 Python/3.5.2-foss-2016a Python/3.5.2 Python/3.6.2-base (D) + Python/3.5.1-intel-2016.01 Python/3.5.1 Python/3.5.2-intel-2017.00 Python/3.6.1 + + Where: + D: Default Module + + If you need software that is not listed, request it at support@it4i.cz. + +``` + +### Used Module Python/3.x + +```console +$ python --version +Python 2.6.6 +$ ml Python/3.6.2-base +$ python --version +Python 3.6.2 +``` + +### Packages in Python/3.x + +```console +$ pip3 list +nose (1.3.7) +pip (8.0.2) +setuptools (20.1.1) +``` + +### How to Install New Package to Python/3.x + +```console +$ ml Python/3.6.2-base +$ python --version +Python 3.6.2 +$ pip3 install pandas --user +Collecting pandas + Downloading pandas-0.20.3.tar.gz (10.4MB) + 100% |â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–| 10.4MB 42kB/s +Collecting python-dateutil>=2 (from pandas) + Downloading python_dateutil-2.6.1-py2.py3-none-any.whl (194kB) + 100% |â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–| 196kB 1.3MB/s +Collecting pytz>=2011k (from pandas) + Downloading pytz-2017.2-py2.py3-none-any.whl (484kB) + 100% |â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–| 487kB 757kB/s +Collecting numpy>=1.7.0 (from pandas) + Using cached numpy-1.13.1.zip +Collecting six>=1.5 (from python-dateutil>=2->pandas) + Downloading six-1.11.0-py2.py3-none-any.whl +Building wheels for collected packages: pandas, numpy + Running setup.py bdist_wheel for pandas ... done + Stored in directory: /home/kru0052/.cache/pip/wheels/dd/17/6c/a1c7e8d855f3a700b21256329fd396d105b533c5ed3e20c5e9 + Running setup.py bdist_wheel for numpy ... done + Stored in directory: /home/kru0052/.cache/pip/wheels/94/44/90/4ce81547e3e5f4398b1601d0051e828b8160f8d3f3dd5a0c8c +Successfully built pandas numpy +Installing collected packages: six, python-dateutil, pytz, numpy, pandas +Successfully installed numpy-1.13.1 pandas-0.20.3 python-dateutil-2.6.1 pytz-2017.2 six-1.11.0 +``` + +### How to Update Package in Python/3.x? + +```console +$ pip3 install scipy --upgrade --user +Collecting scipy + Downloading scipy-0.19.1-cp27-cp27mu-manylinux1_x86_64.whl (45.0MB) + 100% |â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–â–| 45.0MB 5.8kB/s +Requirement already up-to-date: numpy>=1.8.2 in /apps/all/Python/3.6.2/lib/python3.6/site-packages (from scipy) +Installing collected packages: scipy +Successfully installed scipy-0.19.1 +``` diff --git a/docs.it4i/software/machine-learning/introduction.md b/docs.it4i/software/machine-learning/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..7d9e790f00afe03ac2ad3568e871e497bd8e9656 --- /dev/null +++ b/docs.it4i/software/machine-learning/introduction.md @@ -0,0 +1,27 @@ +# Machine Learning + +This section overviews machine learning frameworks and libraries available the clusters. + +## TensorFlow + +Load TensorFlow module: + +```console +$ ml Tensorflow +``` + +Test module: + +```console +$ ml Tensorflow +``` + +Read more about available versions at the [TensorFlow page](tensorflow/). + +## Theano + +Read more about available versions at the [Theano page](theano/). + +## Keras + +Read more about available versions at the [Keras page](keras/). diff --git a/docs.it4i/software/machine-learning/tensorflow.md b/docs.it4i/software/machine-learning/tensorflow.md new file mode 100644 index 0000000000000000000000000000000000000000..fc55b217c2f427c2cbeedc95d328aeae8944ee72 --- /dev/null +++ b/docs.it4i/software/machine-learning/tensorflow.md @@ -0,0 +1,65 @@ +# TensorFlow + +TensorFlow is an open-source software library for machine intelligence. +For searching available modules type: + +```console +$ ml av Tensorflow +``` + +## Anselm Modules + +Anselm provides beside others these three different TensorFlow modules: + +**Tensorflow/1.1.0** (CPU only, not recommended), module built with: + +* GCC/4.9.3 +* Python/3.6.1 + +**Tensorflow/1.1.0-CUDA-7.5.18-Python-3.6.1** (GPU enabled), module built with: + +* GCC/4.9.3 +* Python/3.6.1 +* CUDA/7.5.18 +* cuDNN/5.1-CUDA-7.5.18 + +**Tensorflow/1.1.0-CUDA-8.0.44-Python-3.6.1** (GPU enabled), module built with: + +* GCC/4.9.3 +* Python/3.6.1 +* CUDA/8.0.44 +* cuDNN/5.1-CUDA-8.0.44 + +## Salomon Modules + +Salomon provides beside others these three different TensorFlow modules: + +**Tensorflow/1.1.0** (not recommended), module built with: + +* GCC/4.9.3 +* Python/3.6.1 + +**Tensorflow/1.2.0-GCC-7.1.0-2.28** (default, recommended), module built with: + +* TensorFlow 1.2 with SIMD support. TensorFlow build taking advantage of the Salomon CPU architecture. +* GCC/7.1.0-2.28 +* Python/3.6.1 +* protobuf/3.2.0-GCC-7.1.0-2.28-Python-3.6.1 + +**Tensorflow/1.2.0-intel-2017.05-mkl** (TensorFlow 1.2 with MKL support), module built with: + +* icc/2017.4.196-GCC-7.1.0-2.28 +* Python/3.6.1 +* protobuf/3.2.0-GCC-7.1.0-2.28-Python-3.6.1 + +## TensorFlow Application Example + +After loading one of the available TensorFlow modules, you can check the functionality running the following python script. + +```python +import tensorflow as tf + +c = tf.constant('Hello World!') +sess = tf.Session() +print(sess.run(c)) +``` diff --git a/docs.it4i/software/mic/mic_environment.md b/docs.it4i/software/mic/mic_environment.md new file mode 100644 index 0000000000000000000000000000000000000000..a3343ca17b08586422c63f95e63864dcbc308309 --- /dev/null +++ b/docs.it4i/software/mic/mic_environment.md @@ -0,0 +1,437 @@ +# Intel Xeon Phi Environment + +Intel Xeon Phi (so-called MIC) accelerator can be used in several modes ([Offload](../intel/intel-xeon-phi-salomon/#offload-mode) and [Native](#native-mode)). The default mode on the cluster is offload mode, but all modes described in this document are supported. + +See sections below for more details. + +## Intel Utilities for Xeon Phi + +Continue [here](../intel/intel-xeon-phi-salomon/) + +## GCC With [KNC](https://en.wikipedia.org/wiki/Xeon_Phi) Support + +On Salomon cluster we have module `GCC/5.1.1-knc` with cross-compiled (offload) support. (gcc, g++ and gfortran) + +!!! warning + Only Salomon cluster. + +To compile a code using GCC compiler run following commands + +* Create `reduce_mul.c` + +```console +$ vim reduce_mul.c +``` + +```c +#include <immintrin.h> + +double reduce(double* values) +{ + __m512d val = _mm512_load_pd(values); + return _mm512_reduce_mul_pd(val); +} +``` + +* A create `main.c` + +```console +vim main.c +``` + +```c +#include <immintrin.h> +#include <stdio.h> +#include <stdlib.h> + +double reduce(double* values); + +int main(int argc, char* argv[]) +{ + // Generate random input vector of [-1, 1] values. + double values[8] __attribute__((aligned(64))); + for (int i = 0; i < 8; i++) + values[i] = 2 * (0.5 - rand() / (double)RAND_MAX); + + double vector = reduce(values); + double scalar = values[0]; + for (int i = 1; i < 8; i++) + scalar *= values[i]; + + printf("%f vs %f\n", vector, scalar); + + fflush(stdout); + + return 0; +} +``` + +* Compile + +```console +$ ml GCC/5.1.1-knc +$ gcc -mavx512f -O3 -c reduce_mul.c -o reduce_mul.s -S +$ gcc -O3 -c reduce_mul.s -o reduce_mul.o +$ gcc -std=c99 -O3 -c main.c -o main_gcc.o +$ gcc -O3 reduce_mul.o main_gcc.o -o reduce_mul +``` + +* To execute the code, run the following command on the host + +```console +$ micnativeloadex ./reduce_mul +-0.004276 vs -0.004276 +``` + +## Native Mode + +In the native mode a program is executed directly on Intel Xeon Phi without involvement of the host machine. Similarly to offload mode, the code is compiled on the host computer with Intel compilers. + +To compile a code user has to be connected to a compute with MIC and load Intel compilers module. To get an interactive session on a compute node with an Intel Xeon Phi and load the module use following commands + +```console +$ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True -A NONE-0-0 +$ ml intel/2017b +``` + +To produce a binary compatible with Intel Xeon Phi architecture user has to specify `-mmic` compiler flag. Two compilation examples are shown below. The first example shows how to compile OpenMP parallel code `vect-add.c` for host only + +```c +#include <stdio.h> + +typedef int T; + +#define SIZE 1000 + +#pragma offload_attribute(push, target(mic)) +T in1[SIZE]; +T in2[SIZE]; +T res[SIZE]; +#pragma offload_attribute(pop) + +// MIC function to add two vectors +__attribute__((target(mic))) add_mic(T *a, T *b, T *c, int size) { + int i = 0; + #pragma omp parallel for + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +// CPU function to add two vectors +void add_cpu (T *a, T *b, T *c, int size) { + int i; + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +// CPU function to generate a vector of random numbers +void random_T (T *a, int size) { + int i; + for (i = 0; i < size; i++) + a[i] = rand() % 10000; // random number between 0 and 9999 +} + +// CPU function to compare two vectors +int compare(T *a, T *b, T size ){ + int pass = 0; + int i; + for (i = 0; i < size; i++){ + if (a[i] != b[i]) { + printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); + pass = 1; + } + } + if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); + return pass; +} + +int main() +{ + int i; + random_T(in1, SIZE); + random_T(in2, SIZE); + + #pragma offload target(mic) in(in1,in2) inout(res) + { + + // Parallel loop from main function + #pragma omp parallel for + for (i=0; i<SIZE; i++) + res[i] = in1[i] + in2[i]; + + // or parallel loop is called inside the function + add_mic(in1, in2, res, SIZE); + + } + + //Check the results with CPU implementation + T res_cpu[SIZE]; + add_cpu(in1, in2, res_cpu, SIZE); + compare(res, res_cpu, SIZE); + +} +``` + +```console +$ icc -xhost -no-offload -fopenmp vect-add.c -o vect-add-host +``` + +* To run this code on host, use + +```console +$ ./vect-add-host +Test passed +``` + +* The second example shows how to compile the same code for Intel Xeon Phi + +```console +$ icc -mmic -fopenmp vect-add.c -o vect-add-mic +``` + +* Execution of the Program in Native Mode on Intel Xeon Phi + +The user access to the Intel Xeon Phi is through the SSH. Since user home directories are mounted using NFS on the accelerator, users do not have to copy binary files or libraries between the host and accelerator. Get the PATH of MIC enabled libraries for currently used Intel Compiler. + +* To run this code on Intel Xeon Phi + +```console +$ ssh mic0 +$ ./vect-add-mic +./vect-add-mic: error while loading shared libraries: libiomp5.so: cannot open shared object file: No such file or directory +$ export LD_LIBRARY_PATH=LD_LIBRARY_PATH:/apps/all/icc/2017.4.196-GCC-6.4.0-2.28/compilers_and_libraries/linux/lib/mic +$ ./vect-add-mic +Test passed +``` + +!!! tip + Or use the procedure from the chapter [Devel Environment](#devel-environment). + +## Only Intel Xeon Phi Cards + +Execute native job + +```console +$ qsub -A NONE-0-0 -q qmic -l select=1 -l walltime=10:00:00 -I +r21u01n577-mic1:~$ +``` + +## Devel Environment + +To get an overview of the currently loaded modules, use `module list` or `ml` (without specifying extra arguments). + +```console +r21u02n578-mic0:~$ ml +No modules loaded +``` + +To get an overview of all available modules, you can use `ml avail` or simply `ml av` + +```console +r21u02n578-mic0:~$ ml av + +-------------- /apps/phi/system/devel -------------------------- + devel_environment/1.0 (S) + + Where: + S: Module is Sticky, requires --force to unload or purge + +``` + +Activate devel environment + +```console +r21u02n578-mic0:~$ ml devel_environment +``` + +And again to get an overview of all available modules, you can use `ml avail` or simply `ml av` + +```console +r21u02n578-mic0:~$ ml av + +-------------- /apps/phi/modules/compiler -------------------------- + icc/2017.4.196-GCC-6.4.0-2.28 + +-------------- /apps/phi/modules/devel -------------------------- + M4/1.4.18 devel_environment/1.0 (S) ncurses/6.0 + +-------------- /apps/phi/modules/lang -------------------------- + Bison/3.0.4 Tcl/8.6.6 flex/2.6.4 + +-------------- /apps/phi/modules/lib -------------------------- + libreadline/7.0 zlib/1.2.11 + +-------------- /apps/phi/modules/math -------------------------- + Octave/3.8.2 + +-------------- /apps/phi/modules/mpi -------------------------- + impi/2017.3.196-iccifort-2017.4.196-GCC-6.4.0-2.28 + +-------------- /apps/phi/modules/toolchain -------------------------- + iccifort/2017.4.196-GCC-6.4.0-2.28 ifort/2017.4.196-GCC-6.4.0-2.28 + +-------------- /apps/phi/modules/tools -------------------------- + bzip2/1.0.6 cURL/7.53.1 expat/2.2.5 + +-------------- /apps/phi/modules/vis -------------------------- + gettext/0.19.8 + + Where: + S: Module is Sticky, requires --force to unload or purge + +``` + +After load module `devel_environment` are available modules for architecture k1om-mpss-linux and now exists systems software (gcc, cmake, make, git, htop, vim, ...). + +* Example + +```console +r21u02n578-mic0:~$ gcc --version +gcc (GCC) 5.1.1 +Copyright (C) 2015 Free Software Foundation, Inc. +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +@r21u02n578-mic0:~$ cmake --version +cmake version 2.8.7 + +r21u02n578-mic0:~$ git --version +git version 1.7.7 + +r21u02n578-mic0:~$ make --version +GNU Make 3.82 +Built for k1om-mpss-linux-gnu +Copyright (C) 2010 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. + +r21u02n578-mic0:~$ perl --version + +This is perl 5, version 14, subversion 2 (v5.14.2) built for k1om-linux + +Copyright 1987-2011, Larry Wall + +Perl may be copied only under the terms of either the Artistic License or the +GNU General Public License, which may be found in the Perl 5 source kit. + +Complete documentation for Perl, including FAQ lists, should be found on +this system using "man perl" or "perldoc perl". If you have access to the +Internet, point your browser at http://www.perl.org/, the Perl Home Page. + +... +``` + +* Execute previous cross-compiled code `vect-add-mic` + +```console +r21u01n577-mic1:~$ ml devel_environment +r21u01n577-mic1:~$ ml icc +r21u01n577-mic1:~$ ./vect-add-mic +Test passed +``` + +!!! tip + PATH of MIC libraries for Intel Compiler set automatically. + +## Modules + +Examples for modules. + +### MPI + +Load module for devel environment `devel_environment` and load mpi module `impi/2017.3.196-iccifort-2017.4.196-GCC-6.4.0-2.28` (intel/2017b) + +Execute test + +```console +$ qsub -A SERVICE -q qmic -l select=4 -l walltime=01:00:00 -I +r21u01n577-mic0:~$ ml devel_environment +r21u01n577-mic0:~$ ml impi +r21u01n577-mic0:~$ ml + +Currently Loaded Modules: + 1) devel_environment/1.0 (S) 3) ifort/2017.4.196-GCC-6.4.0-2.28 5) impi/2017.3.196-iccifort-2017.4.196-GCC-6.4.0-2.28 + 2) icc/2017.4.196-GCC-6.4.0-2.28 4) iccifort/2017.4.196-GCC-6.4.0-2.28 + + Where: + S: Module is Sticky, requires --force to unload or purge +r21u01n577-mic0:~$ mpirun -n 244 hostname | sort | uniq -c | sort -n + 61 r21u01n577-mic0 + 61 r21u01n577-mic1 + 61 r21u02n578-mic0 + 61 r21u02n578-mic1 +r21u01n577-mic0:~$ mpirun -n 976 hostname | sort | uniq -c | sort -n + 244 r21u01n577-mic0 + 244 r21u01n577-mic1 + 244 r21u02n578-mic0 + 244 r21u02n578-mic1 +r21u01n577-mic0:~$ mpirun hostname | sort | uniq -c | sort -n + 1 r21u01n577-mic0 + 1 r21u01n577-mic1 + 1 r21u02n578-mic0 + 1 r21u02n578-mic1 +``` + +!!! warning + Modules icc, ifort and iccifort are only libraries and headers, not compilers... For compile use the procedure from the chapter [Native Mode](#native-mode) + +### Octave/3.8.2 + +Load module for devel environment `devel_environment`, load module `Octave/3.8.2` and run test + +```console +r21u01n577-mic0:~$ ml devel_environment +r21u01n577-mic0:~$ ml Octave/3.8.2 +r21u01n577-mic0:~$ octave -q /apps/phi/software/Octave/3.8.2/example/test0.m +warning: docstring file '/apps/phi/software/Octave/3.8.2/share/octave/3.8.2/etc/built-in-docstrings' not found +warning: readline is not linked, so history control is not available +Use some basic operators ... +Work with some small matrixes ... +Save matrix to file ... +Load matrix from file ... +Display matrix ... +m3 = + + 39.200 19.600 39.200 + 58.800 117.600 156.800 + 254.800 411.600 686.000 + +Work with some big matrixes ... +Sum ... +Multiplication ... +r21u01n577-mic0:~$ cat test.mat +# Created by Octave 3.8.2, Thu Dec 07 11:11:09 2017 CET <kru0052@r21u01n577-mic0> +# name: m3 +# type: matrix +# rows: 3 +# columns: 3 + 39.2 19.6 39.2 + 58.8 117.6 156.8 + 254.8 411.6 686 +``` + +## Native Build Software With Devel Environment + +Compiler + +* gcc (GCC) 5.1.1 **without** gfortran support + +Architecture (depends on compiled software): + +* k1om-unknown-linux-gnu +* k1om-mpss-linux-gnu +* x86_64-k1om-linux +* k1om-mpss-linux + +Configure step (for `configure`,`make` and `make install` software) + +* specify architecture `--build=` + +```console +./configure --prefix=/apps/phi/software/ncurses/6.0 --build=k1om-mpss-linux +``` + +Modulefile and Lmod + +* Read [Lmod](../modules/lmod/) diff --git a/docs.it4i/software/lmod.md b/docs.it4i/software/modules/lmod.md similarity index 99% rename from docs.it4i/software/lmod.md rename to docs.it4i/software/modules/lmod.md index 24a93cbc8873a822a74950b720973bec6783284e..4aad91ce85aff13e59594cc1efaeee4ce6465d78 100644 --- a/docs.it4i/software/lmod.md +++ b/docs.it4i/software/modules/lmod.md @@ -4,9 +4,6 @@ Lmod is a modules tool, a modern alternative to the oudated & no longer actively Detailed documentation on Lmod is available [here](http://lmod.readthedocs.io). -!!! warning - All the new modules will be availabe in Lmod environment only. - ## Benefits * significantly more responsive module commands, in particular module avail (ml av) diff --git a/docs.it4i/anselm/software/mpi/Running_OpenMPI.md b/docs.it4i/software/mpi/Running_OpenMPI.md similarity index 97% rename from docs.it4i/anselm/software/mpi/Running_OpenMPI.md rename to docs.it4i/software/mpi/Running_OpenMPI.md index 4974eb5b16625faa930a69cded916948257d00a5..5f2606bc2671012b95e53831eb63aa0c95451500 100644 --- a/docs.it4i/anselm/software/mpi/Running_OpenMPI.md +++ b/docs.it4i/software/mpi/Running_OpenMPI.md @@ -2,14 +2,14 @@ ## OpenMPI Program Execution -The OpenMPI programs may be executed only via the PBS Workload manager, by entering an appropriate queue. On Anselm, the **bullxmpi-1.2.4.1** and **OpenMPI 1.6.5** are OpenMPI based MPI implementations. +The OpenMPI programs may be executed only via the PBS Workload manager, by entering an appropriate queue. On Anselm, the **bullxmpi-1.2.4.1** and **OpenMPI 1.6.5** are OpenMPI based MPI implementations. On Salomon, the **OpenMPI 1.8.6** is OpenMPI based MPI implementation. ### Basic Usage !!! note Use the mpiexec to run the OpenMPI code. -Example: +Example (for Anselm): ```console $ qsub -q qexp -l select=4:ncpus=16 -I @@ -52,7 +52,7 @@ The mpiprocs and ompthreads parameters allow for selection of number of running ### One MPI Process Per Node -Follow this example to run one MPI process per node, 16 threads per process. +Follow this example to run one MPI process per node, 16 threads per process (**on Salomon try 24 threads in following examples**). ```console $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=1:ompthreads=16 -I diff --git a/docs.it4i/salomon/software/mpi/mpi.md b/docs.it4i/software/mpi/mpi.md similarity index 86% rename from docs.it4i/salomon/software/mpi/mpi.md rename to docs.it4i/software/mpi/mpi.md index 99f8745aca779ad71a3ab5322499aa9e8bc9fd25..b307a96223a47fd3b8ff86681e2e8b0f7a483d60 100644 --- a/docs.it4i/salomon/software/mpi/mpi.md +++ b/docs.it4i/software/mpi/mpi.md @@ -39,7 +39,7 @@ Examples: $ ml gompi/2015b ``` -In this example, we activate the latest OpenMPI with latest GNU compilers (OpenMPI 1.8.6 and GCC 5.1). Please see more information about toolchains in section [Environment and Modules](../../environment-and-modules/) . +In this example, we activate the latest OpenMPI with latest GNU compilers (OpenMPI 1.8.6 and GCC 5.1). Please see more information about toolchains in section [Environment and Modules](../../modules-matrix/) . To use OpenMPI with the intel compiler suite, use @@ -71,31 +71,31 @@ Wrappers mpif90, mpif77 that are provided by Intel MPI are designed for gcc and Example program: ```cpp - // helloworld_mpi.c - #include <stdio.h> +// helloworld_mpi.c +#include <stdio.h> - #include<mpi.h> +#include<mpi.h> - int main(int argc, char **argv) { +int main(int argc, char **argv) { - int len; - int rank, size; - char node[MPI_MAX_PROCESSOR_NAME]; +int len; +int rank, size; +char node[MPI_MAX_PROCESSOR_NAME]; - // Initiate MPI - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD,&rank); - MPI_Comm_size(MPI_COMM_WORLD,&size); +// Initiate MPI +MPI_Init(&argc, &argv); +MPI_Comm_rank(MPI_COMM_WORLD,&rank); +MPI_Comm_size(MPI_COMM_WORLD,&size); - // Get hostame and print - MPI_Get_processor_name(node,&len); - printf("Hello world! from rank %d of %d on host %sn",rank,size,node); +// Get hostame and print +MPI_Get_processor_name(node,&len); +printf("Hello world! from rank %d of %d on host %sn",rank,size,node); - // Finalize and exit - MPI_Finalize(); +// Finalize and exit +MPI_Finalize(); - return 0; - } +return 0; +} ``` Compile the above example with @@ -117,10 +117,11 @@ The MPI program executable must be available within the same path on all nodes. Optimal way to run an MPI program depends on its memory requirements, memory access pattern and communication pattern. -Consider these ways to run an MPI program: -1\. One MPI process per node, 24 threads per process -2\. Two MPI processes per node, 12 threads per process -3\. 24 MPI processes per node, 1 thread per process. +!!! note + Consider these ways to run an MPI program: + 1. One MPI process per node, 24 threads per process + 2. Two MPI processes per node, 12 threads per process + 3. 24 MPI processes per node, 1 thread per process. **One MPI** process per node, using 24 threads, is most useful for memory demanding applications, that make good use of processor cache memory and are not memory bound. This is also a preferred way for communication intensive applications as one process per node enjoys full bandwidth access to the network interface. diff --git a/docs.it4i/anselm/software/mpi/mpi4py-mpi-for-python.md b/docs.it4i/software/mpi/mpi4py-mpi-for-python.md similarity index 53% rename from docs.it4i/anselm/software/mpi/mpi4py-mpi-for-python.md rename to docs.it4i/software/mpi/mpi4py-mpi-for-python.md index 4d687dc2f61e9ae593a7900b1bf183e07e61634f..c2a2976b5bb1b27e46c47fc615846de9179fa29e 100644 --- a/docs.it4i/anselm/software/mpi/mpi4py-mpi-for-python.md +++ b/docs.it4i/software/mpi/mpi4py-mpi-for-python.md @@ -6,7 +6,7 @@ MPI for Python provides bindings of the Message Passing Interface (MPI) standard This package is constructed on top of the MPI-1/2 specifications and provides an object oriented interface which closely follows MPI-2 C++ bindings. It supports point-to-point (sends, receives) and collective (broadcasts, scatters, gathers) communications of any picklable Python object, as well as optimized communications of Python object exposing the single-segment buffer interface (NumPy arrays, builtin bytes/string/array objects). -On Anselm MPI4Py is available in standard Python modules. +MPI4Py is available in standard Python modules on the clusters. ## Modules @@ -20,7 +20,7 @@ $ ml av Python/ Python/2.7.11-foss-2016a Python/3.5.2-foss-2016a Python/3.5.1 Python/2.7.9-foss-2015g Python/3.4.3-intel-2015b Python/2.7.9 Python/2.7.11-intel-2015b Python/3.5.2 - + $ ml av OpenMPI/ --------------------------------------- /apps/modules/mpi -------------------------- OpenMPI/1.8.6-GCC-4.4.7-system OpenMPI/1.8.8-GNU-4.9.3-2.25 OpenMPI/1.10.1-GCC-4.9.3-2.25 @@ -28,7 +28,8 @@ OpenMPI/1.8.6-GNU-5.1.0-2.25 OpenMPI/1.8.8-GNU-5.1.0-2.25 OpenMPI/1.10.1-GN OpenMPI/1.8.8-iccifort-2015.3.187-GNU-4.9.3-2.25 OpenMPI/2.0.2-GCC-6.3.0-2.27 ``` -!!! Warning "" +!!! Warning "Flavours" + * modules Python/x.x.x-intel... - intel MPI * modules Python/x.x.x-foss... - OpenMPI * modules Python/x.x.x - without MPI @@ -37,8 +38,8 @@ OpenMPI/1.8.6-GNU-5.1.0-2.25 OpenMPI/1.8.8-GNU-5.1.0-2.25 OpenMPI/1.10.1-GN You need to import MPI to your python program. Include the following line to the python script: -```cpp - from mpi4py import MPI +```python +from mpi4py import MPI ``` The MPI4Py enabled python programs [execute as any other OpenMPI](Running_OpenMPI/) code.The simpliest way is to run @@ -57,52 +58,114 @@ $ mpiexec python hello_world.py ### Hello World! -```cpp - from mpi4py import MPI +```python +from mpi4py import MPI - comm = MPI.COMM_WORLD +comm = MPI.COMM_WORLD - print "Hello! I'm rank %d from %d running in total..." % (comm.rank, comm.size) +print "Hello! I'm rank %d from %d running in total..." % (comm.rank, comm.size) - comm.Barrier() # wait for everybody to synchronize +comm.Barrier() # wait for everybody to synchronize ``` ### Collective Communication With NumPy Arrays -```cpp - from mpi4py import MPI - from __future__ import division - import numpy as np +```python +from mpi4py import MPI +from __future__ import division +import numpy as np - comm = MPI.COMM_WORLD +comm = MPI.COMM_WORLD - print("-"*78) - print(" Running on %d cores" % comm.size) - print("-"*78) +print("-"*78) +print(" Running on %d cores" % comm.size) +print("-"*78) - comm.Barrier() +comm.Barrier() - # Prepare a vector of N=5 elements to be broadcasted... - N = 5 - if comm.rank == 0: - A = np.arange(N, dtype=np.float64) # rank 0 has proper data - else: - A = np.empty(N, dtype=np.float64) # all other just an empty array +# Prepare a vector of N=5 elements to be broadcasted... +N = 5 +if comm.rank == 0: + A = np.arange(N, dtype=np.float64) # rank 0 has proper data +else: + A = np.empty(N, dtype=np.float64) # all other just an empty array - # Broadcast A from rank 0 to everybody - comm.Bcast( [A, MPI.DOUBLE] ) +# Broadcast A from rank 0 to everybody +comm.Bcast( [A, MPI.DOUBLE] ) - # Everybody should now have the same... - print "[%02d] %s" % (comm.rank, A) +# Everybody should now have the same... +print "[%02d] %s" % (comm.rank, A) ``` Execute the above code as: ```console -$ qsub -q qexp -l select=4:ncpus=16:mpiprocs=16:ompthreads=1 -I +$ qsub -q qexp -l select=4:ncpus=16:mpiprocs=16:ompthreads=1 -I # Salomon: ncpus=24:mpiprocs=24 $ ml Python $ ml OpenMPI $ mpiexec -bycore -bind-to-core python hello_world.py ``` In this example, we run MPI4Py enabled code on 4 nodes, 16 cores per node (total of 64 processes), each python process is bound to a different core. More examples and documentation can be found on [MPI for Python webpage](https://pypi.python.org/pypi/mpi4py). + +### Adding Numbers + +Task: count sum of numbers from 1 to 1 000 000. (There is an easy formula to count the sum of arithmetic sequence, but we are showing the MPI solution with adding numbers one by one). + +```python +#!/usr/bin/python + +import numpy +from mpi4py import MPI +import time + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +a = 1 +b = 1000000 + +perrank = b//size +summ = numpy.zeros(1) + +comm.Barrier() +start_time = time.time() + +temp = 0 +for i in range(a + rank*perrank, a + (rank+1)*perrank): + temp = temp + i + +summ[0] = temp + +if rank == 0: + total = numpy.zeros(1) +else: + total = None + +comm.Barrier() +#collect the partial results and add to the total sum +comm.Reduce(summ, total, op=MPI.SUM, root=0) + +stop_time = time.time() + +if rank == 0: + #add the rest numbers to 1 000 000 + for i in range(a + (size)*perrank, b+1): + total[0] = total[0] + i + print ("The sum of numbers from 1 to 1 000 000: ", int(total[0])) + print ("time spent with ", size, " threads in milliseconds") + print ("-----", int((time.time()-start_time)*1000), "-----") +``` + +Execute the code above as: + +```console +$ qsub -I -q qexp -l select=4:ncpus=16,walltime=00:30:00 + +$ ml Python/3.5.2-intel-2017.00 + +$ mpirun -n 2 python myprogram.py +``` + +You can increase n and watch time lowering. diff --git a/docs.it4i/anselm/software/mpi/running-mpich2.md b/docs.it4i/software/mpi/running-mpich2.md similarity index 100% rename from docs.it4i/anselm/software/mpi/running-mpich2.md rename to docs.it4i/software/mpi/running-mpich2.md diff --git a/docs.it4i/anselm/software/numerical-languages/introduction.md b/docs.it4i/software/numerical-languages/introduction.md similarity index 94% rename from docs.it4i/anselm/software/numerical-languages/introduction.md rename to docs.it4i/software/numerical-languages/introduction.md index 8646fe6fed34038028fdab9dbcde98840d204944..39ac8f738d5fc30b5c08e9603f18a00b077f2902 100644 --- a/docs.it4i/anselm/software/numerical-languages/introduction.md +++ b/docs.it4i/software/numerical-languages/introduction.md @@ -1,4 +1,4 @@ -# Numerical languages +# Numerical Languages Interpreted languages for numerical computations and analysis @@ -11,7 +11,7 @@ This section contains a collection of high-level interpreted languages, primaril MATLAB® is a high-level language and interactive environment for numerical computation, visualization, and programming. ```console -$ ml MATLAB/2015b-EDU +$ ml MATLAB $ matlab ``` diff --git a/docs.it4i/salomon/software/numerical-languages/matlab.md b/docs.it4i/software/numerical-languages/matlab.md similarity index 72% rename from docs.it4i/salomon/software/numerical-languages/matlab.md rename to docs.it4i/software/numerical-languages/matlab.md index e08bf9099ee9d5175a8579afe2fc9d6d32b1aa8f..e3bccc1a9ae9f976509dea53ad6cf4b1ac11302a 100644 --- a/docs.it4i/salomon/software/numerical-languages/matlab.md +++ b/docs.it4i/software/numerical-languages/matlab.md @@ -16,14 +16,14 @@ $ ml MATLAB By default the EDU variant is marked as default. If you need other version or variant, load the particular version. To obtain the list of available versions use ```console -$ module avail MATLAB +$ ml av MATLAB ``` If you need to use the Matlab GUI to prepare your Matlab programs, you can use Matlab directly on the login nodes. But for all computations use Matlab on the compute nodes via PBS Pro scheduler. -If you require the Matlab GUI, please follow the general information about [running graphical applications](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). +If you require the Matlab GUI, please follow the general information about [running graphical applications](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). -Matlab GUI is quite slow using the X forwarding built in the PBS (qsub -X), so using X11 display redirection either via SSH or directly by xauth (please see the "GUI Applications on Compute Nodes over VNC" part [here](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/)) is recommended. +Matlab GUI is quite slow using the X forwarding built in the PBS (qsub -X), so using X11 display redirection either via SSH or directly by xauth (please see the "GUI Applications on Compute Nodes over VNC" part [here](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/)) is recommended. To run Matlab with GUI, use @@ -50,11 +50,11 @@ Delete previously used file mpiLibConf.m, we have observed crashes when using In To use Distributed Computing, you first need to setup a parallel profile. We have provided the profile for you, you can either import it in MATLAB command line: ```console - > parallel.importProfile('/apps/all/MATLAB/2015b-EDU/SalomonPBSPro.settings') +> parallel.importProfile('/apps/all/MATLAB/2015b-EDU/SalomonPBSPro.settings') - ans = +ans = - SalomonPBSPro +SalomonPBSPro ``` Or in the GUI, go to tab HOME -> Parallel -> Manage Cluster Profiles..., click Import and navigate to : @@ -63,9 +63,12 @@ Or in the GUI, go to tab HOME -> Parallel -> Manage Cluster Profiles..., click I With the new mode, MATLAB itself launches the workers via PBS, so you can either use interactive mode or a batch mode on one node, but the actual parallel processing will be done in a separate job started by MATLAB itself. Alternatively, you can use "local" mode to run parallel code on just a single node. +!!! note + The profile is confusingly named Salomon, but you can use it also on Anselm. + ### Parallel Matlab Interactive Session -Following example shows how to start interactive session with support for Matlab GUI. For more information about GUI based applications on Anselm see [this page](../../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). +Following example shows how to start interactive session with support for Matlab GUI. For more information about GUI based applications on Anselm see [this page](../../general/accessing-the-clusters/graphical-user-interface/x-window-system/). ```console $ xhost + @@ -79,8 +82,8 @@ The second part of the command shows how to request all necessary licenses. In t Once the access to compute nodes is granted by PBS, user can load following modules and start Matlab: ```console - r1i0n17$ ml MATLAB/2015a-EDU - r1i0n17$ matlab & +$ ml MATLAB/2015a-EDU +$ matlab & ``` ### Parallel Matlab Batch Job in Local Mode @@ -88,26 +91,26 @@ Once the access to compute nodes is granted by PBS, user can load following modu To run matlab in batch mode, write an matlab script, then write a bash jobscript and execute via the qsub command. By default, matlab will execute one matlab worker instance per allocated core. ```bash - #!/bin/bash - #PBS -A PROJECT ID - #PBS -q qprod - #PBS -l select=1:ncpus=24:mpiprocs=24:ompthreads=1 +#!/bin/bash +#PBS -A PROJECT ID +#PBS -q qprod +#PBS -l select=1:ncpus=24:mpiprocs=24:ompthreads=1 # Anselm: ncpus=16:mpiprocs=16 - # change to shared scratch directory - SCR=/scratch/work/user/$USER/$PBS_JOBID - mkdir -p $SCR ; cd $SCR || exit +# change to shared scratch directory +SCR=/scratch/.../$USER/$PBS_JOBID # change path in according to the cluster +mkdir -p $SCR ; cd $SCR || exit - # copy input file to scratch - cp $PBS_O_WORKDIR/matlabcode.m . +# copy input file to scratch +cp $PBS_O_WORKDIR/matlabcode.m . - # load modules - module load MATLAB/2015a-EDU +# load modules +module load MATLAB/2015a-EDU - # execute the calculation - matlab -nodisplay -r matlabcode > output.out +# execute the calculation +matlab -nodisplay -r matlabcode > output.out - # copy output file to home - cp output.out $PBS_O_WORKDIR/. +# copy output file to home +cp output.out $PBS_O_WORKDIR/. ``` This script may be submitted directly to the PBS workload manager via the qsub command. The inputs and matlab script are in matlabcode.m file, outputs in output.out file. Note the missing .m extension in the matlab -r matlabcodefile call, **the .m must not be included**. Note that the **shared /scratch must be used**. Further, it is **important to include quit** statement at the end of the matlabcode.m script. @@ -123,7 +126,7 @@ $ qsub ./jobscript The last part of the configuration is done directly in the user Matlab script before Distributed Computing Toolbox is started. ```console - cluster = parcluster('local') +cluster = parcluster('local') ``` This script creates scheduler object "cluster" of type "local" that starts workers locally. @@ -134,40 +137,40 @@ This script creates scheduler object "cluster" of type "local" that starts worke The last step is to start matlabpool with "cluster" object and correct number of workers. We have 24 cores per node, so we start 24 workers. ```console - parpool(cluster,24); +parpool(cluster,24); # Anselm: parpool(cluster,24) - ... parallel code ... +... parallel code ... - parpool close +parpool close ``` The complete example showing how to use Distributed Computing Toolbox in local mode is shown here. -```console - cluster = parcluster('local'); - cluster +```matlab +cluster = parcluster('local'); +cluster - parpool(cluster,24); +parpool(cluster,24); - n=2000; +n=2000; - W = rand(n,n); - W = distributed(W); - x = (1:n)'; - x = distributed(x); - spmd - [~, name] = system('hostname') +W = rand(n,n); +W = distributed(W); +x = (1:n)'; +x = distributed(x); +spmd +[~, name] = system('hostname') - T = W*x; % Calculation performed on labs, in parallel. - % T and W are both codistributed arrays here. - end - T; - whos % T and W are both distributed arrays here. + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. +end +T; +whos % T and W are both distributed arrays here. - parpool close - quit +parpool close +quit ``` You can copy and paste the example in a .m file and execute. Note that the parpool size should correspond to **total number of cores** available on allocated nodes. @@ -178,30 +181,30 @@ This mode uses PBS scheduler to launch the parallel pool. It uses the SalomonPBS This is an example of m-script using PBS mode: -```console - cluster = parcluster('SalomonPBSPro'); - set(cluster, 'SubmitArguments', '-A OPEN-0-0'); - set(cluster, 'ResourceTemplate', '-q qprod -l select=10:ncpus=24'); - set(cluster, 'NumWorkers', 240); +```matlab +cluster = parcluster('SalomonPBSPro'); +set(cluster, 'SubmitArguments', '-A OPEN-0-0'); +set(cluster, 'ResourceTemplate', '-q qprod -l select=10:ncpus=24'); +set(cluster, 'NumWorkers', 240); - pool = parpool(cluster,240); +pool = parpool(cluster,240); - n=2000; +n=2000; - W = rand(n,n); - W = distributed(W); - x = (1:n)'; - x = distributed(x); - spmd - [~, name] = system('hostname') +W = rand(n,n); +W = distributed(W); +x = (1:n)'; +x = distributed(x); +spmd +[~, name] = system('hostname') - T = W*x; % Calculation performed on labs, in parallel. - % T and W are both codistributed arrays here. - end - whos % T and W are both distributed arrays here. + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. +end +whos % T and W are both distributed arrays here. - % shut down parallel pool - delete(pool) +% shut down parallel pool +delete(pool) ``` Note that we first construct a cluster object using the imported profile, then set some important options, namely : SubmitArguments, where you need to specify accounting id, and ResourceTemplate, where you need to specify number of nodes to run the job. @@ -219,38 +222,38 @@ For this method, you need to use SalomonDirect profile, import it using [the sam This is an example of m-script using direct mode: -```console - parallel.importProfile('/apps/all/MATLAB/2015b-EDU/SalomonDirect.settings') - cluster = parcluster('SalomonDirect'); - set(cluster, 'NumWorkers', 48); +```matlab +parallel.importProfile('/apps/all/MATLAB/2015b-EDU/SalomonDirect.settings') +cluster = parcluster('SalomonDirect'); +set(cluster, 'NumWorkers', 48); - pool = parpool(cluster, 48); +pool = parpool(cluster, 48); - n=2000; +n=2000; - W = rand(n,n); - W = distributed(W); - x = (1:n)'; - x = distributed(x); - spmd - [~, name] = system('hostname') +W = rand(n,n); +W = distributed(W); +x = (1:n)'; +x = distributed(x); +spmd +[~, name] = system('hostname') - T = W*x; % Calculation performed on labs, in parallel. - % T and W are both codistributed arrays here. - end - whos % T and W are both distributed arrays here. + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. +end +whos % T and W are both distributed arrays here. - % shut down parallel pool - delete(pool) +% shut down parallel pool +delete(pool) ``` ### Non-Interactive Session and Licenses -If you want to run batch jobs with Matlab, be sure to request appropriate license features with the PBS Pro scheduler, at least the `-l __feature__matlab__MATLAB=1` for EDU variant of Matlab. More information about how to check the license features states and how to request them with PBS Pro, please [look here](../../../anselm/software/isv_licenses/). +If you want to run batch jobs with Matlab, be sure to request appropriate license features with the PBS Pro scheduler, at least the `-l __feature__matlab__MATLAB=1` for EDU variant of Matlab. More information about how to check the license features states and how to request them with PBS Pro, please [look here](../isv_licenses/). The licensing feature of PBS is currently disabled. -In case of non-interactive session please read the [following information](../../../anselm/software/isv_licenses/) on how to modify the qsub command to test for available licenses prior getting the resource allocation. +In case of non-interactive session please read the [following information](../isv_licenses/) on how to modify the qsub command to test for available licenses prior getting the resource allocation. ### Matlab Distributed Computing Engines Start Up Time @@ -275,4 +278,4 @@ Since this is a SMP machine, you can completely avoid using Parallel Toolbox and ### Local Cluster Mode -You can also use Parallel Toolbox on UV2000. Use l[ocal cluster mode](matlab/#parallel-matlab-batch-job-in-local-mode), "SalomonPBSPro" profile will not work. +You can also use Parallel Toolbox on UV2000. Use [local cluster mode](matlab/#parallel-matlab-batch-job-in-local-mode), "SalomonPBSPro" profile will not work. diff --git a/docs.it4i/anselm/software/numerical-languages/matlab_1314.md b/docs.it4i/software/numerical-languages/matlab_1314.md similarity index 97% rename from docs.it4i/anselm/software/numerical-languages/matlab_1314.md rename to docs.it4i/software/numerical-languages/matlab_1314.md index 41dca05619875b20806beb1a8dde7c255347bd89..1c2d29d3a3053d9a7bec0c5fc777fb024f0be369 100644 --- a/docs.it4i/anselm/software/numerical-languages/matlab_1314.md +++ b/docs.it4i/software/numerical-languages/matlab_1314.md @@ -46,11 +46,11 @@ Plots, images, etc... will be still available. Recommended parallel mode for running parallel Matlab on Anselm is MPIEXEC mode. In this mode user allocates resources through PBS prior to starting Matlab. Once resources are granted the main Matlab instance is started on the first compute node assigned to job by PBS and workers are started on all remaining nodes. User can use both interactive and non-interactive PBS sessions. This mode guarantees that the data processing is not performed on login nodes, but all processing is on compute nodes. - + For the performance reasons Matlab should use system MPI. On Anselm the supported MPI implementation for Matlab is Intel MPI. To switch to system MPI user has to override default Matlab setting by creating new configuration file in its home directory. The path and file name has to be exactly the same as in the following listing: -```console +```matlab $ vim ~/matlab/mpiLibConf.m function [lib, extras] = mpiLibConf @@ -88,9 +88,9 @@ The second part of the command shows how to request all necessary licenses. In t Once the access to compute nodes is granted by PBS, user can load following modules and start Matlab: ```console - cn79$ ml matlab/R2013a-EDU - cn79$ ml impi/4.1.1.036 - cn79$ matlab & +$ ml matlab/R2013a-EDU +$ ml impi/4.1.1.036 +$ matlab & ``` ### Parallel Matlab Batch Job @@ -133,7 +133,7 @@ $ qsub ./jobscript The last part of the configuration is done directly in the user Matlab script before Distributed Computing Toolbox is started. -```console +```matlab sched = findResource('scheduler', 'type', 'mpiexec'); set(sched, 'MpiexecFileName', '/apps/intel/impi/4.1.1/bin/mpirun'); set(sched, 'EnvironmentSetMethod', 'setenv'); @@ -158,7 +158,7 @@ matlabpool close The complete example showing how to use Distributed Computing Toolbox is show here. -```console +```matlab sched = findResource('scheduler', 'type', 'mpiexec'); set(sched, 'MpiexecFileName', '/apps/intel/impi/4.1.1/bin/mpirun') set(sched, 'EnvironmentSetMethod', 'setenv') diff --git a/docs.it4i/anselm/software/numerical-languages/octave.md b/docs.it4i/software/numerical-languages/octave.md similarity index 71% rename from docs.it4i/anselm/software/numerical-languages/octave.md rename to docs.it4i/software/numerical-languages/octave.md index 4fbb52979a38da23ec3a9a3c93e456383f99ab22..ca785e75dca4e83cccbdf25b68800363f33a841b 100644 --- a/docs.it4i/anselm/software/numerical-languages/octave.md +++ b/docs.it4i/software/numerical-languages/octave.md @@ -4,13 +4,11 @@ GNU Octave is a high-level interpreted language, primarily intended for numerical computations. It provides capabilities for the numerical solution of linear and nonlinear problems, and for performing other numerical experiments. It also provides extensive graphics capabilities for data visualization and manipulation. Octave is normally used through its interactive command line interface, but it can also be used to write non-interactive programs. The Octave language is quite similar to Matlab so that most programs are easily portable. Read more on <http://www.gnu.org/software/octave/> -Two versions of octave are available on Anselm, via module +For looking for avaible modules, type: -| Version | module | -| ----------------------------------------------------- | ------------------------- | -| Octave 3.8.2, compiled with GCC and Multithreaded MKL | Octave/3.8.2-gimkl-2.11.5 | -| Octave 4.0.1, compiled with GCC and Multithreaded MKL | Octave/4.0.1-gimkl-2.11.5 | -| Octave 4.0.0, compiled with >GCC and OpenBLAS | Octave/4.0.0-foss-2015g | +```console +$ ml av octave +``` ## Modules and Execution @@ -18,7 +16,7 @@ Two versions of octave are available on Anselm, via module $ ml Octave ``` -The octave on Anselm is linked to highly optimized MKL mathematical library. This provides threaded parallelization to many octave kernels, notably the linear algebra subroutines. Octave runs these heavy calculation kernels without any penalty. By default, octave would parallelize to 16 threads. You may control the threads by setting the OMP_NUM_THREADS environment variable. +The octave on clusters is linked to highly optimized MKL mathematical library. This provides threaded parallelization to many octave kernels, notably the linear algebra subroutines. Octave runs these heavy calculation kernels without any penalty. By default, octave would parallelize to 16 (Anselm) or 24 (Salomon) threads. You may control the threads by setting the OMP_NUM_THREADS environment variable. To run octave interactively, log in with ssh -X parameter for X11 forwarding. Run octave: @@ -26,31 +24,31 @@ To run octave interactively, log in with ssh -X parameter for X11 forwarding. Ru $ octave ``` -To run octave in batch mode, write an octave script, then write a bash jobscript and execute via the qsub command. By default, octave will use 16 threads when running MKL kernels. +To run octave in batch mode, write an octave script, then write a bash jobscript and execute via the qsub command. By default, octave will use 16 (Anselm) or 24 (Salomon) threads when running MKL kernels. ```bash - #!/bin/bash +#!/bin/bash - # change to local scratch directory - cd /lscratch/$PBS_JOBID || exit +# change to local scratch directory +cd /lscratch/$PBS_JOBID || exit - # copy input file to scratch - cp $PBS_O_WORKDIR/octcode.m . +# copy input file to scratch +cp $PBS_O_WORKDIR/octcode.m . - # load octave module - module load octave +# load octave module +module load octave - # execute the calculation - octave -q --eval octcode > output.out +# execute the calculation +octave -q --eval octcode > output.out - # copy output file to home - cp output.out $PBS_O_WORKDIR/. +# copy output file to home +cp output.out $PBS_O_WORKDIR/. - #exit - exit +#exit +exit ``` -This script may be submitted directly to the PBS workload manager via the qsub command. The inputs are in octcode.m file, outputs in output.out file. See the single node jobscript example in the [Job execution section](../../job-submission-and-execution/). +This script may be submitted directly to the PBS workload manager via the qsub command. The inputs are in octcode.m file, outputs in output.out file. See the single node jobscript example in the [Job execution section](../../salomon/job-submission-and-execution/). The octave c compiler mkoctfile calls the GNU gcc 4.8.1 for compiling native c code. This is very useful for running native c subroutines in octave environment. @@ -62,7 +60,7 @@ Octave may use MPI for interprocess communication This functionality is currentl ## Xeon Phi Support -Octave may take advantage of the Xeon Phi accelerators. This will only work on the [Intel Xeon Phi](../intel-xeon-phi/) [accelerated nodes](../../compute-nodes/). +Octave may take advantage of the Xeon Phi accelerators. This will only work on the [Intel Xeon Phi](../intel-xeon-phi/) [accelerated nodes](../../salomon/compute-nodes/). ### Automatic Offload Support @@ -70,7 +68,7 @@ Octave can accelerate BLAS type operations (in particular the Matrix Matrix mult Example -```console +```octave $ export OFFLOAD_REPORT=2 $ export MKL_MIC_ENABLE=1 $ ml octave diff --git a/docs.it4i/salomon/software/numerical-languages/opencoarrays.md b/docs.it4i/software/numerical-languages/opencoarrays.md similarity index 78% rename from docs.it4i/salomon/software/numerical-languages/opencoarrays.md rename to docs.it4i/software/numerical-languages/opencoarrays.md index f573d400a1991183d99dbfba2f802fba6fd4dd31..bfbbb7f65fb0681b57213fb1b821d933195118bb 100644 --- a/docs.it4i/salomon/software/numerical-languages/opencoarrays.md +++ b/docs.it4i/software/numerical-languages/opencoarrays.md @@ -20,24 +20,25 @@ Read more on <http://www.opencoarrays.org/> Indexing of individual images can be shown on the simple *Hello World* program: ```fortran - program hello_world - implicit none - print *, 'Hello world from image ', this_image() , 'of', num_images() - end program hello_world +program hello_world + implicit none + print *, 'Hello world from image ', this_image() , 'of', num_images() +end program hello_world ``` + * num_images() - returns the number of all images * this_image() - returns the image index - numbered from 1 to num_images() -### Co-dimension Variables Declaration +### Co-Dimension Variables Declaration Coarray variables can be declared with the **codimension[*]** attribute or by adding trailing index **[*]** after the variable name. Notice, the ***** character always has to be in the square brackets. ```fortran - integer, codimension[*] :: scalar - integer :: scalar[*] - real, dimension(64), codimension[*] :: vector - real :: vector(64)[*] +integer, codimension[*] :: scalar +integer :: scalar[*] +real, dimension(64), codimension[*] :: vector +real :: vector(64)[*] ``` ### Images Synchronization @@ -48,24 +49,24 @@ Synchronization can be done across all images or only between selected images. B Example program: ```fortran - program synchronization_test - implicit none - integer :: i ! Local variable - integer :: numbers[*] ! Scalar coarray - - ! Genereate random number on image 1 - if (this_image() == 1) then - numbers = floor(rand(1) * 1000) - ! Distribute information to other images - do i = 2, num_images() - numbers[i] = numbers - end do - end if - - sync all ! Barrier to synchronize all images - - print *, 'The random number is', numbers - end program synchronization_test +program synchronization_test + implicit none + integer :: i ! Local variable + integer :: numbers[*] ! Scalar coarray + + ! Genereate random number on image 1 + if (this_image() == 1) then + numbers = floor(rand(1) * 1000) + ! Distribute information to other images + do i = 2, num_images() + numbers[i] = numbers + end do + end if + + sync all ! Barrier to synchronize all images + + print *, 'The random number is', numbers +end program synchronization_test ``` * sync all - Synchronize all images between each other @@ -73,7 +74,7 @@ Example program: * sync images(*index*) - Synchronize this image to image with *index* !!! note - **number** is the local variable while **number[*index*]** accesses the variable in a specific image. + **number** is the local variable while **number[*index*]** accesses the variable in a specific image. **number[this_image()]** is the same as **number**. ## Compile and Run @@ -94,7 +95,7 @@ $ caf hello_world.f90 -o hello_world.x ``` !!! warning - The input file extension **.f90** or **.F90** are to be interpreted as *Fortran 90*. + The input file extension **.f90** or **.F90** are to be interpreted as *Fortran 90*. If the input file extension is **.f** or **.F** the source code will be interpreted as *Fortran 77*. Another method for compiling is by invoking the *mpif90* compiler wrapper directly: @@ -103,7 +104,6 @@ Another method for compiling is by invoking the *mpif90* compiler wrapper direct $ mpif90 hello_world.f90 -o hello_world.x -fcoarray=lib -lcaf_mpi ``` - ### Run CAF Program A CAF program can be run by invoking the *cafrun* wrapper or directly by the *mpiexec*: @@ -124,4 +124,4 @@ $ mpiexec -np 4 ./synchronization_test.x **-np 4** is number of images to run. The parameters of **cafrun** and **mpiexec** are the same. -For more information about running CAF program please follow [Running OpenMPI](../mpi/Running_OpenMPI.md) +For more information about running CAF program please follow [Running OpenMPI - Salomon](../mpi/Running_OpenMPI.md) diff --git a/docs.it4i/salomon/software/numerical-languages/r.md b/docs.it4i/software/numerical-languages/r.md similarity index 78% rename from docs.it4i/salomon/software/numerical-languages/r.md rename to docs.it4i/software/numerical-languages/r.md index 6df515adad043a581ce3da7855737194b1c250ae..3322a89acbf62cde753cfc57adf36a001d986148 100644 --- a/docs.it4i/salomon/software/numerical-languages/r.md +++ b/docs.it4i/software/numerical-languages/r.md @@ -27,7 +27,7 @@ $ ml R ## Execution -The R on Anselm is linked to highly optimized MKL mathematical library. This provides threaded parallelization to many R kernels, notably the linear algebra subroutines. The R runs these heavy calculation kernels without any penalty. By default, the R would parallelize to 24 threads. You may control the threads by setting the OMP_NUM_THREADS environment variable. +The R on cluster is linked to highly optimized MKL mathematical library. This provides threaded parallelization to many R kernels, notably the linear algebra subroutines. The R runs these heavy calculation kernels without any penalty. By default, the R would parallelize to 24 (Salomon) or 16 (Anselm) threads. You may control the threads by setting the OMP_NUM_THREADS environment variable. ### Interactive Execution @@ -40,7 +40,7 @@ $ rstudio ### Batch Execution -To run R in batch mode, write an R script, then write a bash jobscript and execute via the qsub command. By default, R will use 24 threads when running MKL kernels. +To run R in batch mode, write an R script, then write a bash jobscript and execute via the qsub command. By default, R will use 24 (Salomon) or 16 (Anselm) threads when running MKL kernels. Example jobscript: @@ -66,7 +66,7 @@ cp routput.out $PBS_O_WORKDIR/. exit ``` -This script may be submitted directly to the PBS workload manager via the qsub command. The inputs are in rscript.R file, outputs in routput.out file. See the single node jobscript example in the [Job execution section](../../job-submission-and-execution/). +This script may be submitted directly to the PBS workload manager via the qsub command. The inputs are in rscript.R file, outputs in routput.out file. See the single node jobscript example in the [Job execution section - Anselm](../../anselm/job-submission-and-execution/). ## Parallel R @@ -166,47 +166,47 @@ Static Rmpi programs are executed via mpiexec, as any other MPI programs. Number Static Rmpi example: ```r - library(Rmpi) +library(Rmpi) - #integrand function - f <- function(i,h) { - x <- h*(i-0.5) - return (4/(1 + x*x)) - } +#integrand function +f <- function(i,h) { +x <- h*(i-0.5) +return (4/(1 + x*x)) +} - #initialize - invisible(mpi.comm.dup(0,1)) - rank <- mpi.comm.rank() - size <- mpi.comm.size() - n<-0 +#initialize +invisible(mpi.comm.dup(0,1)) +rank <- mpi.comm.rank() +size <- mpi.comm.size() +n<-0 - while (TRUE) - { - #read number of intervals - if (rank==0) { - cat("Enter the number of intervals: (0 quits) ") - fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) - } +while (TRUE) +{ + #read number of intervals + if (rank==0) { + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + } - #broadcat the intervals - n <- mpi.bcast(as.integer(n),type=1) + #broadcat the intervals + n <- mpi.bcast(as.integer(n),type=1) - if(n<=0) break + if(n<=0) break - #run the calculation - n <- max(n,size) - h <- 1.0/n + #run the calculation + n <- max(n,size) + h <- 1.0/n - i <- seq(rank+1,n,size); - mypi <- h*sum(sapply(i,f,h)); + i <- seq(rank+1,n,size); + mypi <- h*sum(sapply(i,f,h)); - pi3 <- mpi.reduce(mypi) + pi3 <- mpi.reduce(mypi) - #print results - if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) - } + #print results + if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) +} - mpi.quit() +mpi.quit() ``` The above is the static MPI example for calculating the number Ď€. Note the **library(Rmpi)** and **mpi.comm.dup()** function calls. Execute the example as: @@ -222,61 +222,61 @@ Dynamic Rmpi programs are executed by calling the R directly. OpenMPI module mus Dynamic Rmpi example: ```r - #integrand function - f <- function(i,h) { - x <- h*(i-0.5) - return (4/(1 + x*x)) - } +#integrand function +f <- function(i,h) { +x <- h*(i-0.5) +return (4/(1 + x*x)) +} - #the worker function - workerpi <- function() - { - #initialize - rank <- mpi.comm.rank() - size <- mpi.comm.size() - n<-0 +#the worker function +workerpi <- function() +{ +#initialize +rank <- mpi.comm.rank() +size <- mpi.comm.size() +n<-0 - while (TRUE) - { - #read number of intervals - if (rank==0) { - cat("Enter the number of intervals: (0 quits) ") - fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) - } +while (TRUE) +{ + #read number of intervals + if (rank==0) { + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + } - #broadcat the intervals - n <- mpi.bcast(as.integer(n),type=1) + #broadcat the intervals + n <- mpi.bcast(as.integer(n),type=1) - if(n<=0) break + if(n<=0) break - #run the calculation - n <- max(n,size) - h <- 1.0/n + #run the calculation + n <- max(n,size) + h <- 1.0/n - i <- seq(rank+1,n,size); - mypi <- h*sum(sapply(i,f,h)); + i <- seq(rank+1,n,size); + mypi <- h*sum(sapply(i,f,h)); - pi3 <- mpi.reduce(mypi) + pi3 <- mpi.reduce(mypi) - #print results - if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) - } - } + #print results + if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) +} +} - #main - library(Rmpi) +#main +library(Rmpi) - cat("Enter the number of slaves: ") - fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) +cat("Enter the number of slaves: ") +fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) - mpi.spawn.Rslaves(nslaves=ns) - mpi.bcast.Robj2slave(f) - mpi.bcast.Robj2slave(workerpi) +mpi.spawn.Rslaves(nslaves=ns) +mpi.bcast.Robj2slave(f) +mpi.bcast.Robj2slave(workerpi) - mpi.bcast.cmd(workerpi()) - workerpi() +mpi.bcast.cmd(workerpi()) +workerpi() - mpi.quit() +mpi.quit() ``` The above example is the dynamic MPI example for calculating the number Ď€. Both master and slave processes carry out the calculation. Note the mpi.spawn.Rslaves(), mpi.bcast.Robj2slave()** and the mpi.bcast.cmd()** function calls. @@ -369,10 +369,10 @@ Example jobscript for [static Rmpi](r/#static-rmpi) parallel R execution, runnin #!/bin/bash #PBS -q qprod #PBS -N Rjob -#PBS -l select=100:ncpus=24:mpiprocs=24:ompthreads=1 +#PBS -l select=100:ncpus=24:mpiprocs=24:ompthreads=1 # Anselm: ncpus=16:mpiprocs=16 # change to scratch directory -SCRDIR=/scratch/work/user/$USER/myjob +SCRDIR=/scratch/work/user/$USER/myjob # Anselm: SCRDIR=/scratch/$USER/myjob cd $SCRDIR || exit # copy input file to scratch @@ -392,7 +392,7 @@ cp routput.out $PBS_O_WORKDIR/. exit ``` -For more information about jobscripts and MPI execution refer to the [Job submission](../../job-submission-and-execution/) and general [MPI](../mpi/mpi/) sections. +For more information about jobscripts and MPI execution refer to the [Job submission](../../anselm/job-submission-and-execution/) and general [MPI](../mpi/mpi/) sections. ## Xeon Phi Offload diff --git a/docs.it4i/anselm/software/numerical-libraries/fftw.md b/docs.it4i/software/numerical-libraries/fftw.md similarity index 100% rename from docs.it4i/anselm/software/numerical-libraries/fftw.md rename to docs.it4i/software/numerical-libraries/fftw.md diff --git a/docs.it4i/anselm/software/numerical-libraries/gsl.md b/docs.it4i/software/numerical-libraries/gsl.md similarity index 100% rename from docs.it4i/anselm/software/numerical-libraries/gsl.md rename to docs.it4i/software/numerical-libraries/gsl.md diff --git a/docs.it4i/anselm/software/numerical-libraries/hdf5.md b/docs.it4i/software/numerical-libraries/hdf5.md similarity index 100% rename from docs.it4i/anselm/software/numerical-libraries/hdf5.md rename to docs.it4i/software/numerical-libraries/hdf5.md diff --git a/docs.it4i/anselm/software/numerical-libraries/intel-numerical-libraries.md b/docs.it4i/software/numerical-libraries/intel-numerical-libraries.md similarity index 98% rename from docs.it4i/anselm/software/numerical-libraries/intel-numerical-libraries.md rename to docs.it4i/software/numerical-libraries/intel-numerical-libraries.md index 5f3834ffa84ee0b1fb73d01dfa0aa1a2106566b0..471f766f4d58c3e88d91d552f6040d36634c73c5 100644 --- a/docs.it4i/anselm/software/numerical-libraries/intel-numerical-libraries.md +++ b/docs.it4i/software/numerical-libraries/intel-numerical-libraries.md @@ -1,4 +1,4 @@ -# Intel numerical libraries +# Intel Numerical Libraries Intel libraries for high performance in numerical computing diff --git a/docs.it4i/anselm/software/numerical-libraries/magma-for-intel-xeon-phi.md b/docs.it4i/software/numerical-libraries/magma-for-intel-xeon-phi.md similarity index 100% rename from docs.it4i/anselm/software/numerical-libraries/magma-for-intel-xeon-phi.md rename to docs.it4i/software/numerical-libraries/magma-for-intel-xeon-phi.md diff --git a/docs.it4i/anselm/software/numerical-libraries/petsc.md b/docs.it4i/software/numerical-libraries/petsc.md similarity index 100% rename from docs.it4i/anselm/software/numerical-libraries/petsc.md rename to docs.it4i/software/numerical-libraries/petsc.md diff --git a/docs.it4i/anselm/software/numerical-libraries/trilinos.md b/docs.it4i/software/numerical-libraries/trilinos.md similarity index 100% rename from docs.it4i/anselm/software/numerical-libraries/trilinos.md rename to docs.it4i/software/numerical-libraries/trilinos.md diff --git a/docs.it4i/software/orca.md b/docs.it4i/software/orca.md deleted file mode 100644 index 3f62415459eceea55e4268d3bd2ca301748e0ce2..0000000000000000000000000000000000000000 --- a/docs.it4i/software/orca.md +++ /dev/null @@ -1,98 +0,0 @@ -# ORCA - -ORCA is a flexible, efficient and easy-to-use general purpose tool for quantum chemistry with specific emphasis on spectroscopic properties of open-shell molecules. It features a wide variety of standard quantum chemical methods ranging from semiempirical methods to DFT to single- and multireference correlated ab initio methods. It can also treat environmental and relativistic effects. - -## Making ORCA Available - -The following module command makes the latest version of orca available to your session - -```console -$ module load ORCA/3_0_3-linux_x86-64 -``` - -### Dependency - -```console -$ module list -Currently Loaded Modulefiles: - 1) /opt/modules/modulefiles/oscar-modules/1.0.3(default) - 2) GCC/4.9.3-binutils-2.25 - 3) binutils/2.25-GCC-4.9.3-binutils-2.25 - 4) GNU/4.9.3-2.25 - 5) icc/2015.3.187-GNU-4.9.3-2.25 - 6) ifort/2015.3.187-GNU-4.9.3-2.25 - 7) iccifort/2015.3.187-GNU-4.9.3-2.25 - 8) numactl/2.0.10-iccifort-2015.3.187-GNU-4.9.3-2.25 - 9) hwloc/1.11.1-iccifort-2015.3.187-GNU-4.9.3-2.25 - 10) OpenMPI/1.8.8-iccifort-2015.3.187-GNU-4.9.3-2.25 - 11) ORCA/3_0_3-linux_x86-64 -``` - -## Example Single Core Job - -Create a file called orca_serial.inp that contains the following orca commands - -```cpp - # My first ORCA calculation :-) - # - # Taken from the Orca manual - # https://orcaforum.cec.mpg.de/OrcaManual.pdf - ! HF SVP - * xyz 0 1 - C 0 0 0 - O 0 0 1.13 - * -``` - -Create a Sun Grid Engine submission file called submit_serial.sh that looks like this - -```console -!/bin/bash - -module load ORCA/3_0_3-linux_x86-64 -orca orca_serial.inp -``` - -Submit the job to the queue with the command - -```console -$ qsub -q qexp -I -l select=1 -qsub: waiting for job 196821.isrv5 to start -qsub: job 196821.isrv5 ready -[username@r37u04n944 ~]$ ./submit_serial.sh - - - ***************** - * O R C A * - ***************** - - --- An Ab Initio, DFT and Semiempirical electronic structure package --- - - ####################################################### - # -***- # - # Department of molecular theory and spectroscopy # - # Directorship: Frank Neese # - # Max Planck Institute for Chemical Energy Conversion # - # D-45470 Muelheim/Ruhr # - # Germany # - # # - # All rights reserved # - # -***- # - ####################################################### - -... - - ****ORCA TERMINATED NORMALLY**** -TOTAL RUN TIME: 0 days 0 hours 0 minutes 2 seconds 796 msec -[username@r37u04n944 orca]$ logout - -qsub: job 196821.isrv5 completed -``` - -## Register as a User - -You are encouraged to register as a user of Orca at [Here](https://orcaforum.cec.mpg.de/) in order to take advantage of updates, announcements and also of the users forum. - -## Documentation - -A comprehensive [.pdf](https://orcaforum.cec.mpg.de/OrcaManual.pdf) manual is available online. diff --git a/docs.it4i/software/singularity.md b/docs.it4i/software/singularity.md deleted file mode 100644 index 39618e32c735f1ef1dd02447014015518f51e342..0000000000000000000000000000000000000000 --- a/docs.it4i/software/singularity.md +++ /dev/null @@ -1,128 +0,0 @@ -[Singularity](http://singularity.lbl.gov/) enables users to have full control of their environment. A non-privileged user can "swap out" the operating system on the host for one they control. So if the host system is running RHEL6 but your application runs in Ubuntu/RHEL7, you can create an Ubuntu/RHEL7 image, install your applications into that image, copy the image to another host, and run your application on that host in it’s native Ubuntu/RHEL7 environment. - -Singularity also allows you to leverage the resources of whatever host you are on. This includes HPC interconnects, resource managers, file systems, GPUs and/or accelerators, etc. Singularity does this by enabling several key facets: - -* Encapsulation of the environment -* Containers are image based -* No user contextual changes or root escalation allowed -* No root owned daemon processes - -## Using Docker Images - -Singularity can import, bootstrap, and even run Docker images directly from [Docker Hub](https://hub.docker.com/). You can easily run RHEL7 like this: - -```console -[hrb33@r33u01n865 ~]$ cat /etc/redhat-release -CentOS release 6.7 (Final) -[hrb33@r33u01n865 ~]$ ml Singularity -[hrb33@r33u01n865 ~]$ singularity shell docker://centos:latest -library/centos:latest -Downloading layer: sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4 -Downloading layer: sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4 -Downloading layer: sha256:45a2e645736c4c66ef34acce2407ded21f7a9b231199d3b92d6c9776df264729 -Downloading layer: sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4 -Singularity: Invoking an interactive shell within container... - -Singularity.centos:latest> cat /etc/redhat-release -CentOS Linux release 7.3.1611 (Core) -``` - -## Creating Own Image from Docker Image - -```console -hrb33@hrb33-toshiba:/$ cd /tmp/ -hrb33@hrb33-toshiba:/tmp$ sudo singularity create /tmp/c7.img -[sudo] password for hrb33: -Creating a new image with a maximum size of 768MiB... -Executing image create helper -Formatting image with ext3 file system -Done. -hrb33@hrb33-toshiba:/tmp$ sudo singularity import c7.img docker://centos:latest -library/centos:latest -Downloading layer: sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4 -Downloading layer: sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4 -Downloading layer: sha256:45a2e645736c4c66ef34acce2407ded21f7a9b231199d3b92d6c9776df264729 -Downloading layer: sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4 -Adding Docker CMD as Singularity runscript... -Bootstrap initialization -No bootstrap definition passed, updating container -Executing Prebootstrap module -Executing Postbootstrap module -Done. -hrb33@hrb33-toshiba:/tmp$ sudo singularity shell --writable c7.img -Singularity: Invoking an interactive shell within container... - -Singularity.c7.img> mkdir /apps /scratch -Singularity.c7.img> exit -hrb33@hrb33-toshiba:/tmp$ rsync -av c7.img hrb33@login4.salomon:/home/hrb33/c7.img -sending incremental file list -c7.img - -sent 805,503,090 bytes received 34 bytes 9,205,749.99 bytes/sec -total size is 805,306,399 speedup is 1.00 - -``` - -Accessing /HOME and /SCRATCH Within Container - -```console -hrb33@hrb33-toshiba:/tmp$ ssh hrb33@login4.salomon - - _____ _ - / ____| | | - | (___ __ _| | ___ _ __ ___ ___ _ __ - \___ \ / _` | |/ _ \| '_ ` _ \ / _ \| '_ \ - ____) | (_| | | (_) | | | | | | (_) | | | | - |_____/ \__,_|_|\___/|_| |_| |_|\___/|_| |_| - - http://www.it4i.cz/?lang=en - - -Last login: Fri Feb 10 14:38:36 2017 from 10.0.131.12 -[hrb33@login4.salomon ~]$ ml Singularity -[hrb33@login4.salomon ~]$ singularity shell --bind /scratch --bind /apps --writable c7.img -Singularity: Invoking an interactive shell within container... - -Singularity.c7.img> ls /apps/ -l -total 68 -drwx------ 4 root root 29 Sep 29 10:28 SCS -drwxrwxr-x 301 2757 2796 8192 Feb 16 10:58 all -drwxrwxr-x 3 2757 2796 19 Jul 9 2015 base -drwxrwxr-x 16 2757 2796 4096 Nov 24 21:47 bio -drwxrwxr-x 10 2757 2796 116 Apr 8 2016 cae -drwxrwxr-x 18 2757 2796 4096 Jan 17 09:49 chem -drwxrwxr-x 11 2757 2796 122 Dec 7 09:25 compiler -drwxrwxr-x 7 2757 2796 73 Jun 29 2016 data -drwxr-xr-x 7 2757 2796 88 Jan 8 2016 debugger -drwxrwxr-x 38 2757 2796 4096 Feb 16 13:37 devel -drwxrwxr-x 9 2757 2796 130 Jan 9 08:40 easybuild -drwxr-xr-x 11 3900 4011 4096 Feb 15 09:50 gentoo -drwxr-xr-x 10 3900 4011 4096 Feb 10 17:01 gentoo_uv -drwxrwxr-x 5 2757 2796 39 Jan 18 2016 geo -drwxr-xr-x 18 2757 2796 4096 Sep 6 16:03 intel2017 -drwxrwxr-x 20 2757 2796 4096 Nov 28 08:50 lang -drwxrwxr-x 31 2757 2796 4096 Dec 7 07:48 lib -drwxrwxr-x 4 2757 2796 32 Nov 9 09:19 licenses -drwxrwxr-x 17 2757 2796 4096 Nov 15 09:24 math -drwxr-xr-x 22 2757 2796 4096 Jan 19 13:15 modules -drwxrwxr-x 8 2757 2796 82 Apr 18 2016 mpi -drwxrwxr-x 13 2757 2796 4096 Oct 24 09:08 numlib -drwxrwxr-x 10 2757 2796 108 Feb 3 11:01 perf -drwxrwxr-x 5 2757 2796 41 Jan 17 09:49 phys -drwxrwxr-x 2 2757 2796 6 Feb 3 11:01 prace -drwxr-xr-x 4 root root 36 Jun 18 2015 sw -drwxrwxr-x 5 2757 2796 49 Feb 15 2016 system -drwxr-xr-x 3 root root 19 Dec 4 2015 test -drwxrwxr-x 13 2757 2796 138 May 31 2016 toolchain -drwxrwxr-x 39 2757 2796 4096 Feb 3 11:27 tools -drwxr-xr-x 4 root root 31 Aug 11 2015 user -drwxrwxr-x 21 2757 2796 4096 Jan 5 18:56 uv -drwxrwxr-x 40 2757 2796 4096 Feb 3 11:01 vis -Singularity.c7.img> ls /scratch/ -l -total 32 -drwx------ 3 root root 4096 Aug 15 2016 backup -drwxr-x--- 2 root root 4096 Dec 5 10:34 sys -drwxrwxrwt 154 root root 20480 Feb 14 14:03 temp -drwxr-xr-x 4 root root 4096 Jan 25 10:48 work -Singularity.c7.img> -``` diff --git a/docs.it4i/anselm/software/ansys/ansys-cfx.md b/docs.it4i/software/tools/ansys/ansys-cfx.md similarity index 88% rename from docs.it4i/anselm/software/ansys/ansys-cfx.md rename to docs.it4i/software/tools/ansys/ansys-cfx.md index b816f026430e7573ddffb81d5ced15770994435b..111c5e3663dd2102e1026f65fae2aeb1fbd8f599 100644 --- a/docs.it4i/anselm/software/ansys/ansys-cfx.md +++ b/docs.it4i/software/tools/ansys/ansys-cfx.md @@ -47,7 +47,7 @@ echo Machines: $hl /ansys_inc/v145/CFX/bin/cfx5solve -def input.def -size 4 -size-ni 4x -part-large -start-method "Platform MPI Distributed Parallel" -par-dist $hl -P aa_r ``` -Header of the PBS file (above) is common and description can be find on [this site](../../job-submission-and-execution/). SVS FEM recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. +Header of the PBS file (above) is common and description can be find on [this site](../../anselm/job-submission-and-execution/). SVS FEM recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. >Input file has to be defined by common CFX def file which is attached to the cfx solver via parameter -def diff --git a/docs.it4i/anselm/software/ansys/ansys-fluent.md b/docs.it4i/software/tools/ansys/ansys-fluent.md similarity index 92% rename from docs.it4i/anselm/software/ansys/ansys-fluent.md rename to docs.it4i/software/tools/ansys/ansys-fluent.md index 4521c758ed7def8e6795f9de97ecb0d698cd9dc9..74326f978d9a088b0ff523fda89d6d502b363dd0 100644 --- a/docs.it4i/anselm/software/ansys/ansys-fluent.md +++ b/docs.it4i/software/tools/ansys/ansys-fluent.md @@ -38,7 +38,7 @@ NCORES=`wc -l $PBS_NODEFILE |awk '{print $1}'` /ansys_inc/v145/fluent/bin/fluent 3d -t$NCORES -cnf=$PBS_NODEFILE -g -i fluent.jou ``` -Header of the pbs file (above) is common and description can be find on [this site](../../resources-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. +Header of the pbs file (above) is common and description can be find on [this site](../../anselm/resources-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. Working directory has to be created before sending pbs job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common Fluent journal file which is attached to the Fluent solver via parameter -i fluent.jou @@ -151,12 +151,12 @@ Fluent could be run in parallel only under Academic Research license. To do so t ANSLIC_ADMIN Utility will be run - + - + - + ANSYS Academic Research license should be moved up to the top of the list. - + diff --git a/docs.it4i/anselm/software/ansys/ansys-ls-dyna.md b/docs.it4i/software/tools/ansys/ansys-ls-dyna.md similarity index 87% rename from docs.it4i/anselm/software/ansys/ansys-ls-dyna.md rename to docs.it4i/software/tools/ansys/ansys-ls-dyna.md index af46af93a30600c440e4e52cb5fdbd1edb677660..46a8ed726fb4da82bb743a71a98aa5e4b9f88132 100644 --- a/docs.it4i/anselm/software/ansys/ansys-ls-dyna.md +++ b/docs.it4i/software/tools/ansys/ansys-ls-dyna.md @@ -50,6 +50,6 @@ echo Machines: $hl /ansys_inc/v145/ansys/bin/ansys145 -dis -lsdynampp i=input.k -machines $hl ``` -Header of the PBS file (above) is common and description can be find on [this site](../../job-submission-and-execution/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. +Header of the PBS file (above) is common and description can be find on [this site](../../anselm/job-submission-and-execution/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common LS-DYNA .**k** file which is attached to the ANSYS solver via parameter i= diff --git a/docs.it4i/anselm/software/ansys/ansys-mechanical-apdl.md b/docs.it4i/software/tools/ansys/ansys-mechanical-apdl.md similarity index 87% rename from docs.it4i/anselm/software/ansys/ansys-mechanical-apdl.md rename to docs.it4i/software/tools/ansys/ansys-mechanical-apdl.md index cdaac19ff664acbcd79c8c234ff30ff54cf06cad..b33f77104100f5504e297484a586cb9a0a7e0201 100644 --- a/docs.it4i/anselm/software/ansys/ansys-mechanical-apdl.md +++ b/docs.it4i/software/tools/ansys/ansys-mechanical-apdl.md @@ -49,7 +49,7 @@ echo Machines: $hl /ansys_inc/v145/ansys/bin/ansys145 -b -dis -p aa_r -i input.dat -o file.out -machines $hl -dir $WORK_DIR ``` -Header of the PBS file (above) is common and description can be found on [this site](../../resource-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allow to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. +Header of the PBS file (above) is common and description can be found on [this site](../../anselm/resources-allocation-policy/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allow to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common APDL file which is attached to the ANSYS solver via parameter -i diff --git a/docs.it4i/anselm/software/ansys/ansys.md b/docs.it4i/software/tools/ansys/ansys.md similarity index 96% rename from docs.it4i/anselm/software/ansys/ansys.md rename to docs.it4i/software/tools/ansys/ansys.md index 24b8b1c09721168d11a214f00a2ee50a109e6c20..d1def39cd2775933d56ce89a58a2cc3c87ea388c 100644 --- a/docs.it4i/anselm/software/ansys/ansys.md +++ b/docs.it4i/software/tools/ansys/ansys.md @@ -2,7 +2,7 @@ **[SVS FEM](http://www.svsfem.cz/)** as **[ANSYS Channel partner](http://www.ansys.com/)** for Czech Republic provided all ANSYS licenses for ANSELM cluster and supports of all ANSYS Products (Multiphysics, Mechanical, MAPDL, CFX, Fluent, Maxwell, LS-DYNA...) to IT staff and ANSYS users. If you are challenging to problem of ANSYS functionality contact please [hotline@svsfem.cz](mailto:hotline@svsfem.cz?subject=Ostrava%20-%20ANSELM) -Anselm provides commercial as well as academic variants. Academic variants are distinguished by "**Academic...**" word in the name of license or by two letter preposition "**aa\_**" in the license feature name. Change of license is realized on command line respectively directly in user's PBS file (see individual products). [More about licensing here](ansys/licensing/) +Anselm provides commercial as well as academic variants. Academic variants are distinguished by "**Academic...**" word in the name of license or by two letter preposition "**aa\_**" in the license feature name. Change of license is realized on command line respectively directly in user's PBS file (see individual products). [More about licensing here](licensing/) To load the latest version of any ANSYS product (Mechanical, Fluent, CFX, MAPDL,...) load the module: diff --git a/docs.it4i/salomon/software/ansys/licensing.md b/docs.it4i/software/tools/ansys/licensing.md similarity index 100% rename from docs.it4i/salomon/software/ansys/licensing.md rename to docs.it4i/software/tools/ansys/licensing.md diff --git a/docs.it4i/anselm/software/ansys/ls-dyna.md b/docs.it4i/software/tools/ansys/ls-dyna.md similarity index 85% rename from docs.it4i/anselm/software/ansys/ls-dyna.md rename to docs.it4i/software/tools/ansys/ls-dyna.md index 063bcf245e7b74781c953eebb309adfad5c0e48d..3bd9deef62ba5ac1456c992f3a7ed74ddc034eff 100644 --- a/docs.it4i/anselm/software/ansys/ls-dyna.md +++ b/docs.it4i/software/tools/ansys/ls-dyna.md @@ -30,6 +30,6 @@ module load lsdyna /apps/engineering/lsdyna/lsdyna700s i=input.k ``` -Header of the PBS file (above) is common and description can be find on [this site](../../job-submission-and-execution/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. +Header of the PBS file (above) is common and description can be find on [this site](../../anselm/job-submission-and-execution/). [SVS FEM](http://www.svsfem.cz) recommends to utilize sources by keywords: nodes, ppn. These keywords allows to address directly the number of nodes (computers) and cores (ppn) which will be utilized in the job. Also the rest of code assumes such structure of allocated resources. Working directory has to be created before sending PBS job into the queue. Input file should be in working directory or full path to input file has to be specified. Input file has to be defined by common LS-DYNA **.k** file which is attached to the LS-DYNA solver via parameter i= diff --git a/docs.it4i/salomon/software/ansys/setting-license-preferences.md b/docs.it4i/software/tools/ansys/setting-license-preferences.md similarity index 78% rename from docs.it4i/salomon/software/ansys/setting-license-preferences.md rename to docs.it4i/software/tools/ansys/setting-license-preferences.md index b3f594d14863cde6aaa28f7a5139223d30a7d95b..769f7470269bf6603bcfa54f69efe66628c429db 100644 --- a/docs.it4i/salomon/software/ansys/setting-license-preferences.md +++ b/docs.it4i/software/tools/ansys/setting-license-preferences.md @@ -1,4 +1,4 @@ -# Setting license preferences +# Setting License Preferences Some ANSYS tools allow you to explicitly specify usage of academic or commercial licenses in the command line (eg. ansys161 -p aa_r to select Academic Research license). However, we have observed that not all tools obey this option and choose commercial license. @@ -12,12 +12,12 @@ $ANSYSLIC_DIR/lic_admin/anslic_admin ANSLIC_ADMIN Utility will be run - + - + - + ANSYS Academic Research license should be moved up to the top or down to the bottom of the list. - + diff --git a/docs.it4i/salomon/software/ansys/workbench.md b/docs.it4i/software/tools/ansys/workbench.md similarity index 98% rename from docs.it4i/salomon/software/ansys/workbench.md rename to docs.it4i/software/tools/ansys/workbench.md index 1b138ccd09fa64fd6ccbafbcb40ff14b2959bad4..0cd523838cc5a4367d3439354e20b1a3caa9fca7 100644 --- a/docs.it4i/salomon/software/ansys/workbench.md +++ b/docs.it4i/software/tools/ansys/workbench.md @@ -4,7 +4,7 @@ It is possible to run Workbench scripts in batch mode. You need to configure solvers of individual components to run in parallel mode. Open your project in Workbench. Then, for example, in Mechanical, go to Tools - Solve Process Settings ... - + Enable Distribute Solution checkbox and enter number of cores (eg. 48 to run on two Salomon nodes). If you want the job to run on more then 1 node, you must also provide a so called MPI appfile. In the Additional Command Line Arguments input field, enter: diff --git a/docs.it4i/software/easybuild.md b/docs.it4i/software/tools/easybuild.md similarity index 97% rename from docs.it4i/software/easybuild.md rename to docs.it4i/software/tools/easybuild.md index 344b04ce895052d2b34883b6043d0c9f8cebcdea..0fbd094400748889661a353338f723c2a9be7ed3 100644 --- a/docs.it4i/software/easybuild.md +++ b/docs.it4i/software/tools/easybuild.md @@ -4,9 +4,6 @@ The objective of this tutorial is to show how EasyBuild can be used to ease, aut The benefit of using EasyBuild for your builds is that it allows automated and reproducable build of software. Once a build has been made, the build script (via the EasyConfig file) or the installed software (via the module file) can be shared with other users. -!!! Warning "" - You need to have Lmod activated to have the modules produced in Lua language. - ## Short Introduction EasyBuild is a tool that allows to perform automated and reproducible compilation and installation of software. @@ -15,8 +12,8 @@ All builds and installations are performed at user level, so you don't need the EasyBuild relies on two main concepts - * Toolchains - * EasyConfig file (our easyconfigs is [here](https://code.it4i.cz/sccs/easyconfigs-it4i)) +* Toolchains +* EasyConfig file (our easyconfigs is [here](https://code.it4i.cz/sccs/easyconfigs-it4i)) Detailed documentations is available [here](http://easybuild.readthedocs.io). @@ -24,8 +21,8 @@ Detailed documentations is available [here](http://easybuild.readthedocs.io). A toolchain corresponds to a compiler and a set of libraries which are commonly used to build a software. The two main toolchains frequently used on the IT4Innovations clusters are the **foss** and **intel**. - * **foss** is based on the GCC compiler and on open-source libraries (OpenMPI, OpenBLAS, etc.). - * **intel** is based on the Intel compiler and on Intel libraries (Intel MPI, Intel Math Kernel Library, etc.). +* **foss** is based on the GCC compiler and on open-source libraries (OpenMPI, OpenBLAS, etc.). +* **intel** is based on the Intel compiler and on Intel libraries (Intel MPI, Intel Math Kernel Library, etc.). Additional details are available on [here](https://github.com/hpcugent/easybuild/wiki/Compiler-toolchains). @@ -86,7 +83,7 @@ Options: ## Build Software Using Provided EasyConfig File -### Search For Available Easyconfig +### Search for Available Easyconfig Searching for available easyconfig files can be done using the **--search** (long output) and **-S** (short output) command line options. All easyconfig files available in the robot search path are considered and searching is done case-insensitive. diff --git a/docs.it4i/software/tools/singularity-it4i.md b/docs.it4i/software/tools/singularity-it4i.md new file mode 100644 index 0000000000000000000000000000000000000000..768ed89367f6e72d0c99742255c69819119c467e --- /dev/null +++ b/docs.it4i/software/tools/singularity-it4i.md @@ -0,0 +1,277 @@ +# Singularity on IT4Innovations + +On our clusters, the Singularity images of main linux distributions are prepared. List of available singularity images (05.04.2018): + +```console + Salomon Anselm + ├── CentOS ├── CentOS + │  ├── 6.9 │  ├── 6.9 + │  ├── 6.9-MIC │  ├── 6.9-GPU + │  ├── 7.4 │  ├── 7.4 + │  └── 7.4-MIC │  └── 7.4-GPU + ├── Debian ├── Debian + │  └── 8.0 │  ├── 8.0 + └── Ubuntu │  └── 8.0-GPU + └── 16.04 └── Ubuntu + ├── 16.04 + └── 16.04-GPU +``` + +Current information about available Singularity images can be obtained by the `ml av` command. The Images are listed in the `OS` section. + +The bootstrap scripts, wrappers, features, etc. are located [here](https://code.it4i.cz/sccs/it4i-singularity). + +!!! note + The images with graphic card support are marked as **-GPU** and images with Intel Xeon Phi support are marked as **-MIC** + +## IT4Innovations Singularity Wrappers + +For better user experience with Singularity containers we prepared several wrappers: + +* image-exec +* image-mpi +* image-run +* image-shell +* image-update + +Listed wrappers help you to use prepared Singularity images loaded as modules. You can easily load Singularity image like any other module on the cluster by `ml OS/version` command. After the module is loaded for the first time, the prepared image is copied into your home folder and is ready for use. When you load the module next time, the version of image is checked and image update (if exists) is offered. Then you can update your copy of image by the `image-update` command. + +!!! warning + With image update, all user changes to the image will be overridden. + +The runscript inside the Singularity image can be run by the `image-run` command. This command automatically mounts the `/scratch` and `/apps` storage and invokes the image as writable, so user changes can be made. + +Very similar to `image-run` is the `image-exec` command. The only difference is that `image-exec` runs user-defined command instead of the runscript. In this case, the command to be run is specified as a parameter. + +For development is very useful to use interactive shell inside the Singularity container. In this interactive shell you can make any changes to the image you want, but be aware that you can not use the `sudo` privileged commands directly on the cluster. To invoke interactive shell easily just use the `image-shell` command. + +Another useful feature of the Singularity is direct support of OpenMPI. For proper MPI function, you have to install the same version of OpenMPI inside the image as you use on cluster. OpenMPI/2.1.1 is installed in prepared images. The MPI must be started outside the container. The easiest way to start the MPI is to use the `image-mpi` command. +This command has the same parameters as the `mpirun`. Thanks to that, there is no difference between running normal MPI application and MPI application in Singularity container. + +## Examples + +In the examples, we will use the prepared Singularity images. + +### Load Image + +```console +$ ml CentOS/6.9 +Your image of CentOS/6.9 is at location: /home/login/.singularity/images/CentOS-6.9_20180220133305.img +``` + +!!! tip + After the module is loaded for the first time, the prepared image is copied into your home folder to the *.singularity/images* subfolder. + +### Wrappers + +**image-exec** + +Executes the given command inside the Singularity image. The container is in this case started, then the command is executed and the container is stopped. + +```console +$ ml CentOS/7.3 +Your image of CentOS/7.3 is at location: /home/login/.singularity/images/CentOS-7.3_20180220104046.img +$ image-exec cat /etc/centos-release +CentOS Linux release 7.3.1708 (Core) +``` + +**image-mpi** + +MPI wrapper - see more in the chapter [Examples MPI](#mpi). + +**image-run** + +This command runs the runscript inside the Singularity image. Note, that the prepared images don't contain a runscript. + +**image-shell** + +Invokes an interactive shell inside the Singularity image. + +```console +$ ml CentOS/7.3 +$ image-shell +Singularity: Invoking an interactive shell within container... + +Singularity CentOS-7.3_20180220104046.img:~> +``` + +### Update Image + +This command is for updating your local copy of the Singularity image. The local copy is overridden in this case. + +```console +$ ml CentOS/6.9 +New version of CentOS image was found. (New: CentOS-6.9_20180220092823.img Old: CentOS-6.9_20170220092823.img) +For updating image use: image-update +Your image of CentOS/6.9 is at location: /home/login/.singularity/images/CentOS-6.9_20170220092823.img +$ image-update +New version of CentOS image was found. (New: CentOS-6.9_20180220092823.img Old: CentOS-6.9_20170220092823.img) +Do you want to update local copy? (WARNING all user modification will be deleted) [y/N]: y +Updating image CentOS-6.9_20180220092823.img + 2.71G 100% 199.49MB/s 0:00:12 (xfer#1, to-check=0/1) + +sent 2.71G bytes received 31 bytes 163.98M bytes/sec +total size is 2.71G speedup is 1.00 +New version is ready. (/home/login/.singularity/images/CentOS-6.9_20180220092823.img) +``` + +### Intel Xeon Phi Cards - MIC + +In the following example, we are using a job submitted by the command: `qsub -A PROJECT -q qprod -l select=1:mpiprocs=24:accelerator=true -I` + +!!! info + The MIC image was prepared only for the Salomon cluster. + +**Code for the Offload Test** + +```c +#include <stdio.h> +#include <thread> +#include <stdlib.h> +#include <unistd.h> + +int main() { + + char hostname[1024]; + gethostname(hostname, 1024); + + unsigned int nthreads = std::thread::hardware_concurrency(); + printf("Hello world, #of cores: %d\n",nthreads); + #pragma offload target(mic) + { + nthreads = std::thread::hardware_concurrency(); + printf("Hello world from MIC, #of cores: %d\n",nthreads); + } +} +``` + +**Compile and Run** + +```console +[login@r38u03n975 ~]$ ml CentOS/6.9-MIC +Your image of CentOS/6.9-MIC is at location: /home/login/.singularity/images/CentOS-6.9-MIC_20180220112004.img +[login@r38u03n975 ~]$ image-shell +Singularity: Invoking an interactive shell within container... + +Singularity CentOS-6.9-MIC_20180220112004.img:~> ml intel/2017b +Singularity CentOS-6.9-MIC_20180220112004.img:~> ml + +Currently Loaded Modules: + 1) GCCcore/6.3.0 3) icc/2017.1.132-GCC-6.3.0-2.27 5) iccifort/2017.1.132-GCC-6.3.0-2.27 7) iimpi/2017a 9) intel/2017a + 2) binutils/2.27-GCCcore-6.3.0 4) ifort/2017.1.132-GCC-6.3.0-2.27 6) impi/2017.1.132-iccifort-2017.1.132-GCC-6.3.0-2.27 8) imkl/2017.1.132-iimpi-2017a +Singularity CentOS-6.9-MIC_20180220112004.img:~> icpc -std=gnu++11 -qoffload=optional hello.c -o hello-host +Singularity CentOS-6.9-MIC_20180220112004.img:~> ./hello-host +Hello world, #of cores: 24 +Hello world from MIC, #of cores: 244 +``` + +### GPU Image + +In the following example, we are using a job submitted by the command: `qsub -A PROJECT -q qnvidia -l select=1:ncpus=16:mpiprocs=16 -l walltime=01:00:00 -I` + +!!! note + The GPU image was prepared only for the Anselm cluster. + +**Checking NVIDIA Driver Inside Image** + +```console +[login@cn199.anselm ~]$ image-shell +Singularity: Invoking an interactive shell within container... + +Singularity CentOS-6.9-GPU_20180309130604.img:~> ml +No modules loaded +Singularity CentOS-6.9-GPU_20180309130604.img:~> nvidia-smi +Mon Mar 12 07:07:53 2018 ++-----------------------------------------------------------------------------+ +| NVIDIA-SMI 390.30 Driver Version: 390.30 | +|-------------------------------+----------------------+----------------------+ +| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | +|===============================+======================+======================| +| 0 Tesla K20m Off | 00000000:02:00.0 Off | 0 | +| N/A 28C P0 51W / 225W | 0MiB / 4743MiB | 89% Default | ++-------------------------------+----------------------+----------------------+ + ++-----------------------------------------------------------------------------+ +| Processes: GPU Memory | +| GPU PID Type Process name Usage | +|=============================================================================| +| No running processes found | ++-----------------------------------------------------------------------------+ +``` + +### MPI + +In the following example, we are using a job submitted by the command: `qsub -A PROJECT -q qprod -l select=2:mpiprocs=24 -l walltime=00:30:00 -I` + +!!! note + We have seen no major performance impact for a job running in a Singularity container. + +With Singularity, the MPI usage model is to call `mpirun` from outside the container, and reference the container from your `mpirun` command. Usage would look like this: + +```console +$ mpirun -np 24 singularity exec container.img /path/to/contained_mpi_prog +``` + +By calling `mpirun` outside of the container, we solve several very complicated work-flow aspects. For example, if `mpirun` is called from within the container it must have a method for spawning processes on remote nodes. Historically the SSH is used for this which means that there must be an `sshd` running within the container on the remote nodes, and this `sshd` process must not conflict with the `sshd` running on that host! It is also possible for the resource manager to launch the job and (in OpenMPI’s case) the Orted (Open RTE User-Level Daemon) processes on the remote system, but that then requires resource manager modification and container awareness. + +In the end, we do not gain anything by calling `mpirun` from within the container except for increasing the complexity levels and possibly losing out on some added +performance benefits (e.g. if a container wasn’t built with the proper OFED as the host). + +#### MPI Inside Singularity Image + +```console +$ ml CentOS/6.9 +$ image-shell +Singularity: Invoking an interactive shell within container... + +Singularity CentOS-6.9_20180220092823.img:~> mpirun hostname | wc -l +24 +``` + +As you can see in this example, we allocated two nodes, but MPI can use only one node (24 processes) when used inside the Singularity image. + +#### MPI Outside Singularity Image + +```console +$ ml CentOS/6.9 +Your image of CentOS/6.9 is at location: /home/login/.singularity/images/CentOS-6.9_20180220092823.img +$ image-mpi hostname | wc -l +48 +``` + +In this case, the MPI wrapper behaves like `mpirun` command. The `mpirun` is called outside the container and the communication between nodes are propagated +into the container automatically. + +## How to Use Own Image on Cluster? + +* Prepare the image on your computer +* Transfer the images to your `/home` directory on the cluster (for example `.singularity/image`) + +```console +local:$ scp container.img login@login4.salomon.it4i.cz:~/.singularity/image/container.img +``` + +* Load module Singularity (`ml Singularity`) +* Use your image + +!!! note + If you want to use the Singularity wrappers with your own images, then load module `Singularity-wrappers/master` and set the environment variable `IMAGE_PATH_LOCAL=/path/to/container.img`. + +## How to Edit IT4Innovations Image? + +* Transfer the image to your computer + +```console +local:$ scp login@login4.salomon.it4i.cz:/home/login/.singularity/image/container.img container.img +``` + +* Modify the image +* Transfer the image from your computer to your `/home` directory on the cluster + +```console +local:$ scp container.img login@login4.salomon.it4i.cz:/home/login/.singularity/image/container.img +``` + +* Load module Singularity (`ml Singularity`) +* Use your image \ No newline at end of file diff --git a/docs.it4i/software/tools/singularity.md b/docs.it4i/software/tools/singularity.md new file mode 100644 index 0000000000000000000000000000000000000000..288f420a003a79d7980e961478a08096941e6359 --- /dev/null +++ b/docs.it4i/software/tools/singularity.md @@ -0,0 +1,118 @@ +# Singularity Container + +[Singularity](http://singularity.lbl.gov/) enables users to have full control of their environment. A non-privileged user can "swap out" the operating system on the host for one they control. So if the host system is running RHEL6 but your application runs in Ubuntu/RHEL7, you can create an Ubuntu/RHEL7 image, install your applications into that image, copy the image to another host, and run your application on that host in it’s native Ubuntu/RHEL7 environment. + +Singularity also allows you to leverage the resources of whatever host you are on. This includes HPC interconnects, resource managers, file systems, GPUs and/or accelerators, etc. Singularity does this by enabling several key facets: + +* Encapsulation of the environment +* Containers are image based +* No user contextual changes or root escalation allowed +* No root owned daemon processes + +This documentation is for Singularity version 2.4 and newer. + +## Using Docker Images + +Singularity can import, bootstrap, and even run Docker images directly from [Docker Hub](https://hub.docker.com/). You can easily run RHEL7 container like this: + +```console +hra0031@login4:~$ cat /etc/redhat-release +CentOS release 6.9 (Final) +hra0031@login4:~$ ml Singularity +hra0031@login4:~$ singularity shell docker://centos:latest +Docker image path: index.docker.io/library/centos:latest +Cache folder set to /home/hra0031/.singularity/docker +[1/1] |===================================| 100.0% +Creating container runtime... +Singularity: Invoking an interactive shell within container... + +Singularity centos:latest:~> cat /etc/redhat-release +CentOS Linux release 7.4.1708 (Core) +``` + +In this case, image is downloaded from Docker Hub, extracted to a temporary directory and Singularity interactive shell is invoked. This procedure can take a lot of time, especially with large images. + +## Importing Docker Image + +Singularity containers can be in three different formats: + +* read-only **squashfs** (default) - best for production +* writable **ext3** (--writable option) +* writable **(ch)root directory** (--sandbox option) - best for development + +Squashfs and (ch)root directory images can be built from Docker source directly on the cluster, no root privileges are needed. It is strongly recomended to create native Singularity image to speed up the launch of the container. + +```console +hra0031@login4:~$ ml Singularity +hra0031@login4:~$ singularity build ubuntu.img docker://ubuntu:latest +Docker image path: index.docker.io/library/ubuntu:latest +Cache folder set to /home/hra0031/.singularity/docker +Importing: base Singularity environment +Importing: /home/hra0031/.singularity/docker/sha256:50aff78429b146489e8a6cb9334d93a6d81d5de2edc4fbf5e2d4d9253625753e.tar.gz +Importing: /home/hra0031/.singularity/docker/sha256:f6d82e297bce031a3de1fa8c1587535e34579abce09a61e37f5a225a8667422f.tar.gz +Importing: /home/hra0031/.singularity/docker/sha256:275abb2c8a6f1ce8e67a388a11f3cc014e98b36ff993a6ed1cc7cd6ecb4dd61b.tar.gz +Importing: /home/hra0031/.singularity/docker/sha256:9f15a39356d6fc1df0a77012bf1aa2150b683e46be39d1c51bc7a320f913e322.tar.gz +Importing: /home/hra0031/.singularity/docker/sha256:fc0342a94c89e477c821328ccb542e6fb86ce4ef4ebbf1098e85669e051ef0dd.tar.gz +Importing: /home/hra0031/.singularity/metadata/sha256:c6a9ef4b9995d615851d7786fbc2fe72f72321bee1a87d66919b881a0336525a.tar.gz +WARNING: Building container as an unprivileged user. If you run this container as root +WARNING: it may be missing some functionality. +Building Singularity image... +Singularity container built: ubuntu.img +Cleaning up... +``` + +## Launching the Container + +The interactive shell can be invoked by the `singularity shell` command. This is useful for development purposes. Use the `-w | --writable` option to make changes inside the container permanent. + +```console +hra0031@login4:~$ singularity shell -w ubuntu.img +Singularity: Invoking an interactive shell within container... + +Singularity ubuntu.img:~> cat /etc/lsb-release +DISTRIB_ID=Ubuntu +DISTRIB_RELEASE=16.04 +DISTRIB_CODENAME=xenial +DISTRIB_DESCRIPTION="Ubuntu 16.04.3 LTS" +``` + +A command can be run inside the container (without interactive shell) by invoking `singularity exec` command. + +``` +hra0031@login4:~$ singularity exec ubuntu.img cat /etc/lsb-release +DISTRIB_ID=Ubuntu +DISTRIB_RELEASE=16.04 +DISTRIB_CODENAME=xenial +DISTRIB_DESCRIPTION="Ubuntu 16.04.3 LTS" +``` + +Singularity image can contain a runscript. This script is executed inside the container after the `singularity run` command is used. The runscript is mostly used to run an application for which the container is built. In the following example it is `fortune | cowsay` command. + +``` +hra0031@login4:~$ singularity run ubuntu.img + ___________________ +< Are you a turtle? > + ------------------- + \ ^__^ + \ (oo)\_______ + (__)\ )\/\ + ||----w | + || || +``` + +## Accessing /HOME and /SCRATCH Within Container + +User home directory is mounted inside the container automatically. If you need access to **/SCRATCH** storage for your computation, this must be mounted by `-B | --bind` option. + +!!!Warning + The mounted folder has to exist inside the container or the container image has to be writable! + +```console +hra0031@login4:~$ singularity shell -B /scratch -w ubuntu.img +Singularity: Invoking an interactive shell within container... + +Singularity ubuntu.img:~> ls /scratch +ddn sys temp work +``` + +Comprehensive documentation can be found at the [Singularity](http://singularity.lbl.gov/quickstart) website. \ No newline at end of file diff --git a/docs.it4i/software/tools/spack.md b/docs.it4i/software/tools/spack.md new file mode 100644 index 0000000000000000000000000000000000000000..1dbf2b2d52843593b9107c5ec297ce203090b18d --- /dev/null +++ b/docs.it4i/software/tools/spack.md @@ -0,0 +1,354 @@ +# Spack + +Spack is a package manager for supercomputers, Linux, and macOS. It makes installing scientific software easy. With Spack, you can build a package with multiple versions, configurations, platforms, and compilers, and all of these builds can coexist on the same machine. + +Homepage is at [https://spack.io/](https://spack.io/) + +Documentation is at [https://spack.readthedocs.io/en/latest/](https://spack.readthedocs.io/en/latest/) + +## Spack on IT4Innovations Clusters + +```console + +$ ml av Spack + +---------------------- /apps/modules/devel ------------------------------ + Spack/default + +``` + +!!! note + Spack/default is rule for setting up local installation + +## First Usage Module Spack/Default + +The Spack will be installed into `~/Spack` folder. You can set the configuration by modifying ~/.spack/configure.yml. + +```console +$ ml Spack +== Settings for first use +Couldn't import dot_parser, loading of dot files will not be possible. +== temporary log file in case of crash /tmp/eb-wLh1RT/easybuild-54vEn3.log +== processing EasyBuild easyconfig /apps/easybuild/easyconfigs-it4i/s/Spack/Spack-0.10.0.eb +== building and installing Spack/0.10.0... +== fetching files... +== creating build dir, resetting environment... +== unpacking... +== patching... +== preparing... +== configuring... +== building... +== testing... +== installing... +== taking care of extensions... +== postprocessing... +== sanity checking... +== cleaning up... +== creating module... +== permissions... +== packaging... +== COMPLETED: Installation ended successfully +== Results of the build can be found in the log file(s) ~/.local/easybuild/software/Spack/0.10.0/easybuild/easybuild-Spack-0.10.0-20170707.122650.log +== Build succeeded for 1 out of 1 +== Temporary log file(s) /tmp/eb-wLh1RT/easybuild-54vEn3.log* have been removed. +== Temporary directory /tmp/eb-wLh1RT has been removed. +== Create folder ~/Spack + +The following have been reloaded with a version change: + 1) Spack/default => Spack/0.10.0 + +$ spack --version +0.10.0 +``` + +## Usage Module Spack/Default + +```console +$ ml Spack + +The following have been reloaded with a version change: + 1) Spack/default => Spack/0.10.0 + +$ spack --version +0.10.0 +``` + +## Build Software Package + +Packages in Spack are written in pure Python, so you can do anything in Spack that you can do in Python. Python was chosen as the implementation language for two reasons. First, Python is becoming ubiquitous in the scientific software community. Second, it’s a modern language and has many powerful features to help make package writing easy. + +### Search for Available Software + +To install software with Spack, you need to know what software is available. Use the `spack list` command. + +```console +$ spack list +==> 1114 packages. +abinit font-bh-100dpi libffi npm py-ply r-maptools tetgen +ack font-bh-75dpi libfontenc numdiff py-pmw r-markdown tethex +activeharmony font-bh-lucidatypewriter-100dpi libfs nwchem py-prettytable r-mass texinfo +adept-utils font-bh-lucidatypewriter-75dpi libgcrypt ocaml py-proj r-matrix texlive +adios font-bh-ttf libgd oce py-prompt-toolkit r-matrixmodels the-platinum-searcher +adol-c font-bh-type1 libgpg-error oclock py-protobuf r-memoise the-silver-searcher +allinea-forge font-bitstream-100dpi libgtextutils octave py-psutil r-mgcv thrift +allinea-reports font-bitstream-75dpi libhio octave-splines py-ptyprocess r-mime tinyxml +ant font-bitstream-speedo libice octopus py-pudb r-minqa tinyxml2 +antlr font-bitstream-type1 libiconv ompss py-py r-multcomp tk +ape font-cronyx-cyrillic libint ompt-openmp py-py2cairo r-munsell tmux +apex font-cursor-misc libjpeg-turbo opari2 py-py2neo r-mvtnorm tmuxinator +applewmproto font-daewoo-misc libjson-c openblas py-pychecker r-ncdf4 transset +appres font-dec-misc liblbxutil opencoarrays py-pycodestyle r-networkd3 trapproto +apr font-ibm-type1 libmesh opencv py-pycparser r-nlme tree +... +``` + +#### Specify Software Version (For Package) + +To see more available versions of a package, run `spack versions`. + +```console +$ spack versions git +==> Safe versions (already checksummed): + 2.11.0 2.9.3 2.9.2 2.9.1 2.9.0 2.8.4 2.8.3 2.8.2 2.8.1 2.8.0 2.7.3 2.7.1 +==> Remote versions (not yet checksummed): + Found no versions for git +``` + +## Graph for Software Package + +Spack provides `spack graph` command to display dependency graph. The command by default generates an ASCII rendering of a spec’s dependency graph. + +```console +$ spack graph git +o git +|\ +| |\ +| | |\ +| | | |\ +| | | | |\ +| | | | | |\ +| | | | | | |\ +| | | | | | | |\ +| | | | | | | o | curl +| |_|_|_|_|_|/| | +|/| | | |_|_|/ / +| | | |/| | | | +| | | o | | | | openssl +| |_|/ / / / / +|/| | | | | | +| | | | o | | gettext +| | | | |\ \ \ +| | | | | |\ \ \ +| | | | | | |\ \ \ +| | | | | | | |\ \ \ +| | | | | | | o | | | libxml2 +| |_|_|_|_|_|/| | | | +|/| | | | |_|/| | | | +| | | | |/| | | | | | +o | | | | | | | | | | zlib + / / / / / / / / / / +| | | o | | | | | | xz +| | | / / / / / / +| | | o | | | | | tar +| | | / / / / / +| | | | o | | | pkg-config +| | | | / / / +o | | | | | | perl + / / / / / / +o | | | | | pcre + / / / / / +| o | | | ncurses +| / / / +| | | o autoconf +| | | o m4 +| | | o libsigsegv +| | | +o | | libiconv + / / +| o expat +| +o bzip2 +``` + +### Information for Software Package + +To get more information on a particular package from `spack list`, use `spack info`. + +```console +$ spack info git +Package: git +Homepage: http://git-scm.com + +Safe versions: + 2.11.0 https://github.com/git/git/tarball/v2.11.0 + 2.9.3 https://github.com/git/git/tarball/v2.9.3 + 2.9.2 https://github.com/git/git/tarball/v2.9.2 + 2.9.1 https://github.com/git/git/tarball/v2.9.1 + 2.9.0 https://github.com/git/git/tarball/v2.9.0 + 2.8.4 https://github.com/git/git/tarball/v2.8.4 + 2.8.3 https://github.com/git/git/tarball/v2.8.3 + 2.8.2 https://github.com/git/git/tarball/v2.8.2 + 2.8.1 https://github.com/git/git/tarball/v2.8.1 + 2.8.0 https://github.com/git/git/tarball/v2.8.0 + 2.7.3 https://github.com/git/git/tarball/v2.7.3 + 2.7.1 https://github.com/git/git/tarball/v2.7.1 + +Variants: + None + +Installation Phases: + install + +Build Dependencies: + autoconf curl expat gettext libiconv openssl pcre perl zlib + +Link Dependencies: + curl expat gettext libiconv openssl pcre perl zlib + +Run Dependencies: + None + +Virtual Packages: + None + +Description: + Git is a free and open source distributed version control system + designed to handle everything from small to very large projects with + speed and efficiency. +``` + +### Install Software Package + +`spack install` will install any package shown by `spack list`. For example, to install the latest version of the `git` package, you might type `spack install git` for default version or `spack install git@version` to chose the particular one. + +```console +$ spack install git@2.11.0 +==> Installing git +==> Installing pcre +==> Fetching http://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.39.tar.bz2 +... +``` + +!!! warning + `FTP` on cluster is not allowed, you must edit source link. + +### Edit Rule + +```console +$ spack edit git +``` + +!!! note + To change source link (`ftp://` to `http://`) use `spack create URL -f` to regenerates rules. + +#### **Example** + +```console +$ spack install git +==> Installing git +==> Installing pcre +==> Fetching ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.39.tar.bz2 +curl: (7) couldn't connect to host +==> Fetching from ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.39.tar.bz2 failed. +==> Error: FetchError: All fetchers failed for pcre-8.39-bm3lumpbghly2l7bkjsi4n2l3jyam6ax +... + +$ spack create http://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.39.tar.bz2 -f +==> This looks like a URL for pcre +==> Found 2 versions of pcre: + + 8.41 http://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.bz2 + 8.40 http://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.40.tar.bz2 + +How many would you like to checksum? (default is 1, q to abort) 1 +==> Downloading... +==> Fetching http://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.bz2 +######################################################################## 100,0% +==> Checksummed 1 version of pcre +==> This package looks like it uses the cmake build system +==> Created template for pcre package +==> Created package file: ~/.local/easybuild/software/Spack/0.10.0/var/spack/repos/builtin/packages/pcre/package.py +$ +$ spack install git +==> Installing git +==> Installing pcre +==> Installing cmake +==> Installing ncurses +==> Fetching http://ftp.gnu.org/pub/gnu/ncurses/ncurses-6.0.tar.gz +######################################################################## 100,0% +... +``` + +## Available Spack Module + +We know that `spack list` shows you the names of available packages, but how do you figure out which are already installed? + +```console +==> 19 installed packages. +-- linux-centos6-x86_64 / gcc@4.4.7 ----------------------------- +autoconf@2.69 cmake@3.7.1 expat@2.2.0 git@2.11.0 libsigsegv@2.10 m4@1.4.17 openssl@1.0.2j perl@5.24.0 tar@1.29 zlib@1.2.10 +bzip2@1.0.6 curl@7.50.3 gettext@0.19.8.1 libiconv@1.14 libxml2@2.9.4 ncurses@6.0 pcre@8.41 pkg-config@0.29.1 xz@5.2.2 +``` + +Spack colorizes output. + +```console +$ spack find | less -R +``` + +`spack find` shows the specs of installed packages. A spec is like a name, but it has a version, compiler, architecture, and build options associated with it. In spack, you can have many installations of the same package with different specs. + +## Load and Unload Module + +Neither of these is particularly pretty, easy to remember, or easy to type. Luckily, Spack has its own interface for using modules and dotkits. + +```console +$ spack load git +==> This command requires spack's shell integration. + + To initialize spack's shell commands, you must run one of + the commands below. Choose the right command for your shell. + + For bash and zsh: + . ~/.local/easybuild/software/Spack/0.10.0/share/spack/setup-env.sh + + For csh and tcsh: + setenv SPACK_ROOT ~/.local/easybuild/software/Spack/0.10.0 + source ~/.local/easybuild/software/Spack/0.10.0/share/spack/setup-env.csh +``` + +### First Usage + +```console +$ . ~/.local/easybuild/software/Spack/0.10.0/share/spack/setup-env.sh +``` + +```console +$ git version 1.7.1 +$ spack load git +$ git --version +git version 2.11.0 +$ spack unload git +$ git --version +git version 1.7.1 +``` + +## Uninstall Software Package + +Spack will ask you either to provide a version number to remove the ambiguity or use the `--all` option to uninstall all of the matching packages. + +You may force uninstall a package with the `--force` option. + +```console +$ spack uninstall git +==> The following packages will be uninstalled : + +-- linux-centos6-x86_64 / gcc@4.4.7 ----------------------------- +xmh3hmb git@2.11.0%gcc + + +==> Do you want to proceed ? [y/n] +y +==> Successfully uninstalled git@2.11.0%gcc@4.4.7 arch=linux-centos6-x86_64 -xmh3hmb + +``` diff --git a/docs.it4i/anselm/software/virtualization.md b/docs.it4i/software/tools/virtualization.md similarity index 94% rename from docs.it4i/anselm/software/virtualization.md rename to docs.it4i/software/tools/virtualization.md index 109a771b0c5307471a0131e61298eae9e242467f..1a8f3e497416149d2b2c7d1d59df477e6eaf2209 100644 --- a/docs.it4i/anselm/software/virtualization.md +++ b/docs.it4i/software/tools/virtualization.md @@ -106,42 +106,43 @@ For Windows operating systems we suggest using Local Group Policy Startup script Example startup script for Windows virtual machine: -```bash - @echo off - set LOG=c:startup.log - set MAPDRIVE=z: - set SCRIPT=%MAPDRIVE%run.bat - set TIMEOUT=300 - - echo %DATE% %TIME% Running startup script>%LOG% - - rem Mount share - echo %DATE% %TIME% Mounting shared drive>%LOG% - net use z: 10.0.2.4qemu >%LOG% 2>&1 - dir z: >%LOG% 2>&1 - echo. >%LOG% - - if exist %MAPDRIVE% ( - echo %DATE% %TIME% The drive "%MAPDRIVE%" exists>%LOG% - - if exist %SCRIPT% ( - echo %DATE% %TIME% The script file "%SCRIPT%"exists>%LOG% - echo %DATE% %TIME% Running script %SCRIPT%>%LOG% - set TIMEOUT=0 - call %SCRIPT% - ) else ( - echo %DATE% %TIME% The script file "%SCRIPT%"does not exist>%LOG% - ) - - ) else ( - echo %DATE% %TIME% The drive "%MAPDRIVE%" does not exist>%LOG% - ) - echo. >%LOG% - - timeout /T %TIMEOUT% - - echo %DATE% %TIME% Shut down>%LOG% - shutdown /s /t 0 +```bat +@echo off +set LOG=c:\startup.log +set MAPDRIVE=z: +set SCRIPT=%MAPDRIVE%\run.bat +set TIMEOUT=300 + +echo %DATE% %TIME% Running startup script>%LOG% + +rem Mount share +echo %DATE% %TIME% Mounting shared drive>>%LOG% +net use z: \\10.0.2.4\qemu >>%LOG% 2>&1 +dir z:\ >>%LOG% 2>&1 +echo. >>%LOG% + + +if exist %MAPDRIVE%\ ( + echo %DATE% %TIME% The drive "%MAPDRIVE%" exists>>%LOG% + + if exist %SCRIPT% ( + echo %DATE% %TIME% The script file "%SCRIPT%"exists>>%LOG% + echo %DATE% %TIME% Running script %SCRIPT%>>%LOG% + set TIMEOUT=0 + call %SCRIPT% + ) else ( + echo %DATE% %TIME% The script file "%SCRIPT%"does not exist>>%LOG% + ) + +) else ( + echo %DATE% %TIME% The drive "%MAPDRIVE%" does not exist>>%LOG% +) +echo. >>%LOG% + +timeout /T %TIMEOUT% + +echo %DATE% %TIME% Shut down>>%LOG% +shutdown /s /t 0 ``` Example startup script maps shared job script as drive z: and looks for run script called run.bat. If run script is found it is run else wait for 5 minutes, then shutdown virtual machine. @@ -324,6 +325,12 @@ Both user and vde network back-end have low performance. For fast interconnect ( Cluster Anselm provides TAP device tap0 for your job. TAP interconnect does not provide any services (like NAT, DHCP, DNS, SMB, etc.) just raw networking, so you should provide your services if you need them. +To enable TAP interconect feature you need to specify virt_network=True PBS resource at job submit. + +```console +$ qsub ... -l virt_network=True +``` + Run qemu with TAP network back-end: ```console diff --git a/docs.it4i/anselm/software/gpi2.md b/docs.it4i/software/viz/gpi2.md similarity index 83% rename from docs.it4i/anselm/software/gpi2.md rename to docs.it4i/software/viz/gpi2.md index 09241e15a96f7412f2e7652efda091d7868cd5d1..1de40bd8a592cf0d450a8744f704a767004b2b6a 100644 --- a/docs.it4i/anselm/software/gpi2.md +++ b/docs.it4i/software/viz/gpi2.md @@ -12,6 +12,8 @@ The GPI-2, version 1.0.2 is available on Anselm via module gpi2: ```console $ ml gpi2 + +$ ml av GPI-2 # Salomon ``` The module sets up environment variables, required for linking and running GPI-2 enabled applications. This particular command loads the default module, which is gpi2/1.0.2 @@ -99,40 +101,40 @@ The gaspi_logger utility is used to view the output from all nodes except the ma Following is an example GPI-2 enabled code: ```cpp - #include <GASPI.h> - #include <stdlib.h> +#include <GASPI.h> +#include <stdlib.h> - void success_or_exit ( const char* file, const int line, const int ec) +void success_or_exit ( const char* file, const int line, const int ec) +{ + if (ec != GASPI_SUCCESS) { - if (ec != GASPI_SUCCESS) - { - gaspi_printf ("Assertion failed in %s[%i]:%dn", file, line, ec); - exit (1); - } + gaspi_printf ("Assertion failed in %s[%i]:%dn", file, line, ec); + exit (1); } +} - #define ASSERT(ec) success_or_exit (__FILE__, __LINE__, ec); +#define ASSERT(ec) success_or_exit (__FILE__, __LINE__, ec); - int main(int argc, char *argv[]) - { - gaspi_rank_t rank, num; - gaspi_return_t ret; +int main(int argc, char *argv[]) +{ + gaspi_rank_t rank, num; + gaspi_return_t ret; - /* Initialize GPI-2 */ - ASSERT( gaspi_proc_init(GASPI_BLOCK) ); + /* Initialize GPI-2 */ + ASSERT( gaspi_proc_init(GASPI_BLOCK) ); - /* Get ranks information */ - ASSERT( gaspi_proc_rank(&rank) ); - ASSERT( gaspi_proc_num(&num) ); + /* Get ranks information */ + ASSERT( gaspi_proc_rank(&rank) ); + ASSERT( gaspi_proc_num(&num) ); - gaspi_printf("Hello from rank %d of %dn", - rank, num); + gaspi_printf("Hello from rank %d of %dn", + rank, num); - /* Terminate */ - ASSERT( gaspi_proc_term(GASPI_BLOCK) ); + /* Terminate */ + ASSERT( gaspi_proc_term(GASPI_BLOCK) ); - return 0; - } + return 0; +} ``` Load modules and compile: diff --git a/docs.it4i/anselm/software/openfoam.md b/docs.it4i/software/viz/openfoam.md similarity index 76% rename from docs.it4i/anselm/software/openfoam.md rename to docs.it4i/software/viz/openfoam.md index 865f054d326d17591cf623d0ed9d492d342e01ed..27aefea264ca2414f8abde9cb734896ac1255faa 100644 --- a/docs.it4i/anselm/software/openfoam.md +++ b/docs.it4i/software/viz/openfoam.md @@ -45,7 +45,7 @@ In /opt/modules/modulefiles/engineering you can see installed engineering softwa lsdyna/7.x.x openfoam/2.2.1-gcc481-openmpi1.6.5-SP ``` -For information how to use modules please [look here](../environment-and-modules/). +For information how to use modules please [look here](../anselm/environment-and-modules/). ## Getting Started @@ -92,26 +92,26 @@ Now you can run the first case for example incompressible laminar flow in a cavi Create a Bash script test.sh ```bash - #!/bin/bash - module load openfoam/2.2.1-icc-openmpi1.6.5-DP - source $FOAM_BASHRC +#!/bin/bash +module load openfoam/2.2.1-icc-openmpi1.6.5-DP +source $FOAM_BASHRC - # source to run functions - . $WM_PROJECT_DIR/bin/tools/RunFunctions +# source to run functions +. $WM_PROJECT_DIR/bin/tools/RunFunctions - cd $FOAM_RUN/tutorials/incompressible/icoFoam/cavity +cd $FOAM_RUN/tutorials/incompressible/icoFoam/cavity - runApplication blockMesh - runApplication icoFoam +runApplication blockMesh +runApplication icoFoam ``` -Job submission +Job submission (example for Anselm): ```console $ qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=16,walltime=03:00:00 test.sh ``` -For information about job submission please [look here](../job-submission-and-execution/). +For information about job submission please [look here](../anselm/job-submission-and-execution/). ## Running Applications in Parallel @@ -123,17 +123,17 @@ First we must run serial application bockMesh and decomposePar for preparation o Create a Bash scrip test.sh: ```bash - #!/bin/bash - module load openfoam/2.2.1-icc-openmpi1.6.5-DP - source $FOAM_BASHRC +#!/bin/bash +module load openfoam/2.2.1-icc-openmpi1.6.5-DP +source $FOAM_BASHRC - # source to run functions - . $WM_PROJECT_DIR/bin/tools/RunFunctions +# source to run functions +. $WM_PROJECT_DIR/bin/tools/RunFunctions - cd $FOAM_RUN/tutorials/incompressible/simpleFoam/motorBike +cd $FOAM_RUN/tutorials/incompressible/simpleFoam/motorBike - runApplication blockMesh - runApplication decomposePar +runApplication blockMesh +runApplication decomposePar ``` Job submission @@ -148,25 +148,25 @@ This job create simple block mesh and domain decomposition. Check your decomposi Create a PBS script testParallel.pbs: ```bash - #!/bin/bash - #PBS -N motorBike - #PBS -l select=2:ncpus=16 - #PBS -l walltime=01:00:00 - #PBS -q qprod - #PBS -A OPEN-0-0 +#!/bin/bash +#PBS -N motorBike +#PBS -l select=2:ncpus=16 +#PBS -l walltime=01:00:00 +#PBS -q qprod +#PBS -A OPEN-0-0 - module load openfoam/2.2.1-icc-openmpi1.6.5-DP - source $FOAM_BASHRC +module load openfoam/2.2.1-icc-openmpi1.6.5-DP +source $FOAM_BASHRC - cd $FOAM_RUN/tutorials/incompressible/simpleFoam/motorBike +cd $FOAM_RUN/tutorials/incompressible/simpleFoam/motorBike - nproc = 32 +nproc = 32 - mpirun -hostfile ${PBS_NODEFILE} -np $nproc snappyHexMesh -overwrite -parallel | tee snappyHexMesh.log +mpirun -hostfile ${PBS_NODEFILE} -np $nproc snappyHexMesh -overwrite -parallel | tee snappyHexMesh.log - mpirun -hostfile ${PBS_NODEFILE} -np $nproc potentialFoam -noFunctionObject-writep -parallel | tee potentialFoam.log +mpirun -hostfile ${PBS_NODEFILE} -np $nproc potentialFoam -noFunctionObject-writep -parallel | tee potentialFoam.log - mpirun -hostfile ${PBS_NODEFILE} -np $nproc simpleFoam -parallel | tee simpleFoam.log +mpirun -hostfile ${PBS_NODEFILE} -np $nproc simpleFoam -parallel | tee simpleFoam.log ``` nproc – number of subdomains diff --git a/docs.it4i/salomon/software/paraview.md b/docs.it4i/software/viz/paraview.md similarity index 69% rename from docs.it4i/salomon/software/paraview.md rename to docs.it4i/software/viz/paraview.md index ce52d69cfa423588bc43361249a19261197e34e5..7e2bae9a95bc33c6f83756188a5c1c54e4037892 100644 --- a/docs.it4i/salomon/software/paraview.md +++ b/docs.it4i/software/viz/paraview.md @@ -12,14 +12,14 @@ Homepage : <http://www.paraview.org/> ## Installed Version -Currently, version 5.1.2 compiled with intel/2017a against intel MPI library and OSMesa 12.0.2 is installed on Salomon. +Currently, version 5.1.2 compiled with intel/2017a against intel MPI library and OSMesa 12.0.2 is installed on the clusters. ## Usage -On Salomon, ParaView is to be used in client-server mode. A parallel ParaView server is launched on compute nodes by the user, and client is launched on your desktop PC to control and view the visualization. Download ParaView client application for your OS here: <http://paraview.org/paraview/resources/software.php>. +On the clusters, ParaView is to be used in client-server mode. A parallel ParaView server is launched on compute nodes by the user, and client is launched on your desktop PC to control and view the visualization. Download ParaView client application for your OS here: <http://paraview.org/paraview/resources/software.php>. !!!Warning - Your version must match the version number installed on Salomon. + Your version must match the version number installed on the cluster. ### Launching Server @@ -29,21 +29,27 @@ To launch the server, you must first allocate compute nodes, for example $ qsub -I -q qprod -A OPEN-0-0 -l select=2 ``` -to launch an interactive session on 2 nodes. Refer to [Resource Allocation and Job Execution](../job-submission-and-execution/) for details. +to launch an interactive session on 2 nodes. Refer to [Resource Allocation and Job Execution](../salomon/job-submission-and-execution/) for details. -After the interactive session is opened, load the ParaView module : +After the interactive session is opened, load the ParaView module (following examples for Salomon, Anselm instructions in comments): ```console $ ml ParaView/5.1.2-intel-2017a-mpi ``` -Now launch the parallel server, with number of nodes times 24 processes: +Now launch the parallel server, with number of nodes times 24 (16 on Anselm) processes: ```console $ mpirun -np 48 pvserver --use-offscreen-rendering Waiting for client... Connection URL: cs://r37u29n1006:11111 - Accepting connection(s): r37u29n1006:11111 + Accepting connection(s): r37u29n1006:11111i + +Anselm: +$ mpirun -np 32 pvserver --use-offscreen-rendering + Waiting for client... + Connection URL: cs://cn77:11111 + Accepting connection(s): cn77:11111 ``` Note the that the server is listening on compute node r37u29n1006 in this case, we shall use this information later. @@ -53,10 +59,11 @@ Note the that the server is listening on compute node r37u29n1006 in this case, Because a direct connection is not allowed to compute nodes on Salomon, you must establish a SSH tunnel to connect to the server. Choose a port number on your PC to be forwarded to ParaView server, for example 12345. If your PC is running Linux, use this command to establish a SSH tunnel: ```console -$ ssh -TN -L 12345:r37u29n1006:11111 username@salomon.it4i.cz +Salomon: $ ssh -TN -L 12345:r37u29n1006:11111 username@salomon.it4i.cz +Anselm: $ ssh -TN -L 12345:cn77:11111 username@anselm.it4i.cz ``` -replace username with your login and r37u29n1006 with the name of compute node your ParaView server is running on (see previous step). +replace username with your login and r37u29n1006 (cn77) with the name of compute node your ParaView server is running on (see previous step). If you use PuTTY on Windows, load Salomon connection configuration, then go to *Connection* -> *SSH* -> *Tunnels* to set up the port forwarding. diff --git a/docs.it4i/src/css.css b/docs.it4i/src/css.css index aa07e9978b8b5b682942a07c08ae66179a4ffb4d..967b3127a3bb01ebb8c25c3c200df564f4ac3e09 100644 --- a/docs.it4i/src/css.css +++ b/docs.it4i/src/css.css @@ -1,3 +1,24 @@ +.md-icon--edit:before { + content: "edit"; +} + +.md-icon--check:before { + content: "check"; +} + +.md-icon--help:before { + content: "help"; +} + +.md-nav__link_it4i { + display: block; + -webkit-transition: color .125s; + transition: color .125s; + text-overflow: ellipsis; + cursor: pointer; + overflow: hidden +} + a:not([href*="//"]) { /* CSS for internal links */ } @@ -6,9 +27,128 @@ a.md-footer-social__link.fa.fa-globe { !background: none; } -a[href*="//"]:not( [href*='gitlab.it4i.cz'] ):not( [href*='code.it4i.cz'] ):not( [href*='https://www.it4i.cz'] ) { +a[href*="//"]:not( [href*='gitlab.it4i.cz'] ):not( [href*='code.it4i.cz'] ):not( [href*='https://www.it4i.cz'] ):not( [href*='https://support.it4i.cz'] ) { /*CSS for external links */ background: transparent url("/img/external.png") no-repeat right 0px top 1px; background-size: 12px; padding: 1px 16px 1px 0px; } + +/* */ +.md-nav__link_it4i:after { + font-family: Material Icons; + font-style: normal; + font-variant: normal; + font-weight: 400; + line-height: 1; + text-transform: none; + white-space: nowrap; + speak: none; + word-wrap: normal; + direction: ltr +} + +.md-nav__item--nested>.md-nav__link_it4i:after { + content: "keyboard_arrow_down" +} + +html .md-nav__link_it4i[for=toc],html .md-nav__link_it4i[for=toc]+.md-nav__link_it4i:after,html .md-nav__link_it4i[for=toc]~.md-nav { + display: none +} + +.md-nav__link_it4i[data-md-state=blur] { + color: rgba(0,0,0,.54) +} + +.md-nav__link_it4i--active,.md-nav__link_it4i:active { + color: #3f51b5 +} + +.md-nav__link_it4i:focus,.md-nav__link_it4i:hover { + color: #536dfe +} +@media only screen and (max-width: 59.9375em) { + html .md-nav__link_it4i[for=toc] { + display: block; + padding-right: 4.8rem + } + + html .md-nav__link_it4i[for=toc]:after { + color: inherit; + content: "toc" + } + + html .md-nav__link_it4i[for=toc]+.md-nav__link_it4i { + display: none + } + + html .md-nav__link_it4i[for=toc]~.md-nav { + display: -webkit-box; + display: -ms-flexbox; + display: flex + } + .md-nav--primary .md-nav__item--nested>.md-nav__link_it4i { + padding-right: 4.8rem + } + + .md-nav--primary .md-nav__item--nested>.md-nav__link_it4i:after { + content: "keyboard_arrow_right" + } + + .md-nav--primary .md-nav__link_it4i { + position: relative; + padding: 1.6rem + } + + .md-nav--primary .md-nav__link_it4i:after { + position: absolute; + top: 50%; + right: 1.2rem; + margin-top: -1.2rem; + color: rgba(0,0,0,.54); + font-size: 2.4rem + } + + .md-nav--primary .md-nav__link_it4i:focus:after,.md-nav--primary .md-nav__link_it4i:hover:after { + color: inherit + } + + .md-nav--primary .md-nav--secondary .md-nav .md-nav__link_it4i { + padding-left: 2.8rem + } + + .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav__link_it4i { + padding-left: 4rem + } + + .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav__link_it4i { + padding-left: 5.2rem + } + + .md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav .md-nav__link_it4i { + padding-left: 6.4rem + } +} +@media only screen and (min-width: 76.25em) { + .md-nav__item--nested>.md-nav__link_it4i:after { + display: inline-block; + -webkit-transform-origin: .45em .45em; + transform-origin: .45em .45em; + -webkit-transform-style: preserve-3d; + transform-style: preserve-3d; + vertical-align: -.125em + } + + .js .md-nav__item--nested>.md-nav__link_it4i:after { + -webkit-transition: -webkit-transform .4s; + transition: -webkit-transform .4s; + transition: transform .4s; + transition: transform .4s,-webkit-transform .4s + } + + .md-nav__item--nested .md-nav__toggle:checked~.md-nav__link_it4i:after { + -webkit-transform: rotateX(180deg); + transform: rotateX(180deg) + } + +} diff --git a/material/partials/language.html b/material/partials/language.html index bf43cf5884bf71fcaafaf62532efe913e7fc7f81..dc829c24879619ce78586f542de0b8a502d70fbc 100644 --- a/material/partials/language.html +++ b/material/partials/language.html @@ -4,5 +4,5 @@ "footer.next": "Next", "search.placeholder": "Search", "source.link.title": "Go to repository", - "toc.title": "Table of contents" + "toc.title": "On this page" }[key] }}{% endmacro %} diff --git a/material/partials/toc.html b/material/partials/toc.html index 5e44226b15829bf182ae58c126981cfdfe107281..07a551d7756b62e2a89c4941edb746bd904cfb04 100644 --- a/material/partials/toc.html +++ b/material/partials/toc.html @@ -4,12 +4,42 @@ {% if toc_ | first is defined and "\x3ch1 id=" in page.content %} {% set toc_ = (toc_ | first).children %} {% endif %} + {% if page.abs_url.rstrip('index.html').rstrip('/') == '' %} + {% set it4i_link = config.repo_url + '/edit/master/docs.it4i/index.md' %} + {% set it4i_page = '/index.md' %} + {% set it4i_url = 'https://docs.it4i.cz' %} + {% else %} + {% set it4i_link = config.repo_url + '/edit/master/docs.it4i' + page.abs_url.rstrip('index.html').rstrip('/') + '.md' %} + {% set it4i_page = page.abs_url.rstrip('index.html').rstrip('/') + '.md' %} + {% set it4i_url = 'https://docs.it4i.cz' + page.abs_url %} + {% endif %} {% if toc_ | first is defined %} - <label class="md-nav__title" for="toc">{{ lang.t('toc.title') }}</label> - <ul class="md-nav__list" data-md-scrollfix> - {% for toc_item in toc_ %} - {% include "partials/toc-item.html" %} - {% endfor %} - </ul> + <label class="md-nav__title" for="toc">{{ lang.t('toc.title') }}</label> + <ul class="md-nav__list" data-md-scrollfix> + <li class="md-nav__item"> + <a href="{{ it4i_link }}" title="Edit This Page" class="md-nav__link_it4i" target="_blank"> + <i class="md-icon md-icon--edit"> + </i> + Edit This Page + </a> + </li> + <li class="md-nav__item"> + <a href="https://code.it4i.cz/sccs/docs.it4i.cz/issues/new?issue%5Bdescription%5D=Requested change in page [{{ it4i_page }}]({{ it4i_url }}) /cc @hrb33 @kru0052" title="Request Change" class="md-nav__link_it4i" target="_blank"> + <i class="md-icon md-icon--check"> + </i> + Request Change + </a> + </li> + <li class="md-nav__item"> + <a href="https://support.it4i.cz/rt" title="Get Support" class="md-nav__link_it4i" target="_blank"> + <i class="md-icon md-icon--help"> + </i> + Get Support + </a> + </li> + {% for toc_item in toc_ %} + {% include "partials/toc-item.html" %} + {% endfor %} + </ul> {% endif %} </nav> diff --git a/mkdocs.yml b/mkdocs.yml index 4ab7eb725f617721bb6155a34fc358e9a7ce3e2e..f7c7db34867743e589978f855d74fb3f08c7f40b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,7 +3,7 @@ theme_dir: 'material' docs_dir: docs.it4i site_favicon: img/favicon.ico -extra_css: +extra_css: - src/css.css # Repository @@ -11,23 +11,21 @@ repo_name: sccs/docs.it4i.cz repo_url: https://code.it4i.cz/sccs/docs.it4i.cz # Copyright -copyright: Copyright (c) 2017 IT4Innovations__VERSION__ +copyright: Copyright (c) 2013-2018 IT4Innovations__VERSION__ pages: - Home: index.md -# - History of Downtimes: downtimes_history.md - General: - Applying for Resources: general/applying-for-resources.md - Obtaining Login Credentials: general/obtaining-login-credentials/obtaining-login-credentials.md - Certificates FAQ: general/obtaining-login-credentials/certificates-faq.md - Accessing the Clusters: - #- Introduction: general/accessing-the-clusters/introduction.md - - OpenSSH Keys (UN*X): general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md + - OpenSSH Keys (UNIX): general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md - PuTTY (Windows): general/accessing-the-clusters/shell-access-and-data-transfer/putty.md - X Window System: general/accessing-the-clusters/graphical-user-interface/x-window-system.md - VNC: general/accessing-the-clusters/graphical-user-interface/vnc.md - VPN Access: general/accessing-the-clusters/vpn-access.md -# - VPN-Connection-Fail-in-Win-8.1: general/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md + - Resource Allocation and Job Execution: general/resource_allocation_and_job_execution.md - Salomon Cluster: - Introduction: salomon/introduction.md - Hardware Overview: salomon/hardware-overview.md @@ -58,145 +56,121 @@ pages: - Compute Nodes: anselm/compute-nodes.md - Storage: anselm/storage.md - Network: anselm/network.md - - Remote Visualization: anselm/remote-visualization.md - PRACE User Support: anselm/prace.md - - 'Software': - - Lmod Environment: software/lmod.md - - Modules Matrix: modules-matrix.md - - Singularity Container: software/singularity.md - - EasyBuild: software/easybuild.md - - Salomon Software: - - Available Modules: modules-salomon.md - - Available Modules on UV: modules-salomon-uv.md - - 'ANSYS': - - Introduction: salomon/software/ansys/ansys.md - - ANSYS CFX: salomon/software/ansys/ansys-cfx.md - - ANSYS Fluent: salomon/software/ansys/ansys-fluent.md - - ANSYS LS-DYNA: salomon/software/ansys/ansys-ls-dyna.md - - ANSYS MAPDL: salomon/software/ansys/ansys-mechanical-apdl.md - - Workbench: salomon/software/ansys/workbench.md - - Setting License Preferences: salomon/software/ansys/licensing.md - - Licensing and Available Versions: salomon/software/ansys/setting-license-preferences.md - - 'Bioinformatics': - - Bioinformatics Applications: software/bioinformatics.md - - 'Chemistry': - - Molpro: salomon/software/chemistry/molpro.md - - NWChem: salomon/software/chemistry/nwchem.md - - Phono3py: salomon/software/chemistry/phono3py.md - - ORCA: software/orca.md - - Compilers: salomon/software/compilers.md - - 'COMSOL': - - COMSOL: salomon/software/comsol/comsol-multiphysics.md - - Licensing and Available Versions: salomon/software/comsol/licensing-and-available-versions.md - - 'Debuggers': - - Introduction: salomon/software/debuggers/Introduction.md - - Aislinn: salomon/software/debuggers/aislinn.md - - Allinea Forge (DDT,MAP): salomon/software/debuggers/allinea-ddt.md - - Allinea Performance Reports: salomon/software/debuggers/allinea-performance-reports.md - - Intel VTune Amplifier XE: salomon/software/debuggers/intel-vtune-amplifier.md - - Total View: salomon/software/debuggers/total-view.md - - Valgrind: salomon/software/debuggers/valgrind.md - - Vampir: salomon/software/debuggers/vampir.md - - 'Intel Suite': - - Introduction: salomon/software/intel-suite/intel-parallel-studio-introduction.md - - Intel Advisor: salomon/software/intel-suite/intel-advisor.md - - Intel Compilers: salomon/software/intel-suite/intel-compilers.md - - Intel Debugger: salomon/software/intel-suite/intel-debugger.md - - Intel IPP: salomon/software/intel-suite/intel-integrated-performance-primitives.md - - Intel Inspector: salomon/software/intel-suite/intel-inspector.md - - Intel MKL: salomon/software/intel-suite/intel-mkl.md - - Intel TBB: salomon/software/intel-suite/intel-tbb.md - - Intel Trace Analyzer and Collector: salomon/software/intel-suite/intel-trace-analyzer-and-collector.md - - Intel Xeon Phi: salomon/software/intel-xeon-phi.md - - Java: salomon/software/java.md - - 'MPI': - - Introduction: salomon/software/mpi/mpi.md - - MPI4Py (MPI for Python): salomon/software/mpi/mpi4py-mpi-for-python.md - - Running Open MPI: salomon/software/mpi/Running_OpenMPI.md - - 'Numerical Languages': - - Introduction: salomon/software/numerical-languages/introduction.md - - Matlab: salomon/software/numerical-languages/matlab.md - - Octave: salomon/software/numerical-languages/octave.md - - R: salomon/software/numerical-languages/r.md - - OpenCoarrays: salomon/software/numerical-languages/opencoarrays.md - - Operating System: salomon/software/operating-system.md - - ParaView: salomon/software/paraview.md - - Anselm Software: - - Available Modules: modules-anselm.md - - 'ANSYS': - - Introduction: anselm/software/ansys/ansys.md - - ANSYS CFX: anselm/software/ansys/ansys-cfx.md - - ANSYS Fluent: anselm/software/ansys/ansys-fluent.md - - ANSYS LS-DYNA: anselm/software/ansys/ansys-ls-dyna.md - - ANSYS MAPDL: anselm/software/ansys/ansys-mechanical-apdl.md - - LS-DYNA: anselm/software/ansys/ls-dyna.md - - 'Bioinformatics': - - Bioinformatics Applications: software/bioinformatics.md - - 'Debuggers': - - Allinea Forge (DDT,MAP): anselm/software/debuggers/allinea-ddt.md - - Allinea Performance Reports: anselm/software/debuggers/allinea-performance-reports.md - - CUBE: anselm/software/debuggers/cube.md - - Intel Performance Counter Monitor: anselm/software/debuggers/intel-performance-counter-monitor.md - - Intel VTune Amplifier: anselm/software/debuggers/intel-vtune-amplifier.md - - PAPI: anselm/software/debuggers/papi.md - - Scalasca: anselm/software/debuggers/scalasca.md - - Score-P: anselm/software/debuggers/score-p.md - - Total View: anselm/software/debuggers/total-view.md - - VNC: anselm/software/debuggers/debuggers.md - - Valgrind: anselm/software/debuggers/valgrind.md - - Vampir: anselm/software/debuggers/vampir.md - - 'Chemistry': - - Molpro: anselm/software/chemistry/molpro.md - - NWChem: anselm/software/chemistry/nwchem.md - - ORCA: software/orca.md - - COMSOL: anselm/software/comsol-multiphysics.md - - Compilers: anselm/software/compilers.md - - GPI-2: anselm/software/gpi2.md - - 'Intel Suite': - - Introduction: anselm/software/intel-suite/introduction.md - - Intel Compilers: anselm/software/intel-suite/intel-compilers.md - - Intel Debugger: anselm/software/intel-suite/intel-debugger.md - - Intel IPP: anselm/software/intel-suite/intel-integrated-performance-primitives.md - - Intel MKL: anselm/software/intel-suite/intel-mkl.md - - Intel TBB: anselm/software/intel-suite/intel-tbb.md - - Intel Xeon Phi: anselm/software/intel-xeon-phi.md - - ISV Licenses: anselm/software/isv_licenses.md - - Java: anselm/software/java.md - - 'MPI': - - Introduction: anselm/software/mpi/mpi.md - - MPI4Py (MPI for Python): anselm/software/mpi/mpi4py-mpi-for-python.md - - Running Open MPI: anselm/software/mpi/Running_OpenMPI.md - - Running MPICH2: anselm/software/mpi/running-mpich2.md - - 'Numerical Languages': - - Introduction: anselm/software/numerical-languages/introduction.md - - Matlab 2013-2014: anselm/software/numerical-languages/matlab_1314.md - - Matlab: anselm/software/numerical-languages/matlab.md - - Octave: anselm/software/numerical-languages/octave.md - - R: anselm/software/numerical-languages/r.md - - 'Numerical Libraries': - - FFTW: anselm/software/numerical-libraries/fftw.md - - GSL: anselm/software/numerical-libraries/gsl.md - - HDF5: anselm/software/numerical-libraries/hdf5.md - - Intel Numerical Libraries: anselm/software/numerical-libraries/intel-numerical-libraries.md - - MAGMA for Intel Xeon Phi: anselm/software/numerical-libraries/magma-for-intel-xeon-phi.md - - PETSc: anselm/software/numerical-libraries/petsc.md - - Trilinos: anselm/software/numerical-libraries/trilinos.md + - Software: + - Modules: + - Lmod Environment: software/modules/lmod.md + - Intel Xeon Phi Environment: software/mic/mic_environment.md + - Modules Matrix: modules-matrix.md + - Job Features: job-features.md + - Available Salomon Modules: modules-salomon.md + - Available Salomon Modules on UV: modules-salomon-uv.md + - Available Salomon Modules on PHI Cards: modules-salomon-phi.md + - Available Anselm Modules: modules-anselm.md + - ISV Licenses: software/isv_licenses.md + - Bioinformatics: + - Bioinformatics Applications: software/bio/bioinformatics.md + - Omics Master: + - Overview: software/bio/omics-master/overview.md + - Diagnostic Component (TEAM): software/bio/omics-master/diagnostic-component-team.md + - Priorization Component (BiERApp): software/bio/omics-master/priorization-component-bierapp.md + - CAE: + - COMSOL: + - COMSOL: software/cae/comsol/comsol-multiphysics.md + - Licensing and Available Versions: software/cae/comsol/licensing-and-available-versions.md + - Chemistry: + - Molpro: software/chemistry/molpro.md + - Orca: software/chemistry/orca.md + - NWChem: software/chemistry/nwchem.md + - Phono3py: software/chemistry/phono3py.md + - Compilers: software/compilers.md + - Debuggers: + - Introduction: software/debuggers/Introduction.md + - Aislinn: software/debuggers/aislinn.md + - Allinea Forge (DDT,MAP): software/debuggers/allinea-ddt.md + - Allinea Performance Reports: software/debuggers/allinea-performance-reports.md + - CUBE: software/debuggers/cube.md + - Intel Performance Counter Monitor: software/debuggers/intel-performance-counter-monitor.md + - Intel VTune Amplifier XE: software/debuggers/intel-vtune-amplifier.md + - PAPI: software/debuggers/papi.md + - Scalasca: software/debuggers/scalasca.md + - Score-P: software/debuggers/score-p.md + - Total View: software/debuggers/total-view.md + - Valgrind: software/debuggers/valgrind.md + - Vampir: software/debuggers/vampir.md + - GPU: - NVIDIA CUDA: anselm/software/nvidia-cuda.md - - 'Omics Master': - - Diagnostic Component (TEAM): anselm/software/omics-master/diagnostic-component-team.md - - Priorization Component (BiERApp): anselm/software/omics-master/priorization-component-bierapp.md - - Overview: anselm/software/omics-master/overview.md - - OpenFOAM: anselm/software/openfoam.md - - Operating System: anselm/software/operating-system.md - - ParaView: anselm/software/paraview.md - - Virtualization: anselm/software/virtualization.md -# - Modules Matrix: modules-matrix.md + - Intel: + - Intel Suite: + - Introduction: software/intel/intel-suite/intel-parallel-studio-introduction.md + - Intel Advisor: software/intel/intel-suite/intel-advisor.md + - Intel Compilers: software/intel/intel-suite/intel-compilers.md + - Intel Debugger: software/intel/intel-suite/intel-debugger.md + - Intel IPP: software/intel/intel-suite/intel-integrated-performance-primitives.md + - Intel Inspector: software/intel/intel-suite/intel-inspector.md + - Intel MKL: software/intel/intel-suite/intel-mkl.md + - Intel TBB: software/intel/intel-suite/intel-tbb.md + - Intel Trace Analyzer and Collector: software/intel/intel-suite/intel-trace-analyzer-and-collector.md + - Intel Xeon Phi: + - Intel Xeon Phi Salomon: software/intel/intel-xeon-phi-salomon.md + - Intel Xeon Phi Anselm: software/intel/intel-xeon-phi-anselm.md + - Machine Learning: + - Introduction: software/machine-learning/introduction.md + - TensorFlow: software/machine-learning/tensorflow.md + - MPI: + - Introduction: software/mpi/mpi.md + - MPI4Py (MPI for Python): software/mpi/mpi4py-mpi-for-python.md + - Running Open MPI: software/mpi/Running_OpenMPI.md + - Running MPICH2: software/mpi/running-mpich2.md + - Numerical Languages: + - Introduction: software/numerical-languages/introduction.md + - Clp: salomon/software/numerical-libraries/Clp.md + - R: software/numerical-languages/r.md + - Matlab: software/numerical-languages/matlab.md + - Matlab 2013-2014: software/numerical-languages/matlab_1314.md + - Octave: software/numerical-languages/octave.md + - OpenCoarrays: software/numerical-languages/opencoarrays.md + - Numerical Libraries: + - FFTW: software/numerical-libraries/fftw.md + - GSL: software/numerical-libraries/gsl.md + - HDF5: software/numerical-libraries/hdf5.md + - Intel Numerical Libraries: software/numerical-libraries/intel-numerical-libraries.md + - MAGMA for Intel Xeon Phi: software/numerical-libraries/magma-for-intel-xeon-phi.md + - PETSc: software/numerical-libraries/petsc.md + - Trilinos: software/numerical-libraries/trilinos.md + - Languages: + - Java: software/lang/java.md + - C#: software/lang/csc.md + - Python: software/lang/python.md + - Python-Conda: software/lang/conda.md + - Phys: + - LMGC90: salomon/software/phys/LMGC90.md + - PragTic: salomon/software/phys/PragTic.md + - Tools: + - ANSYS: + - Introduction: software/tools/ansys/ansys.md + - ANSYS CFX: software/tools/ansys/ansys-cfx.md + - ANSYS Fluent: software/tools/ansys/ansys-fluent.md + - ANSYS LS-DYNA: software/tools/ansys/ansys-ls-dyna.md + - ANSYS MAPDL: software/tools/ansys/ansys-mechanical-apdl.md + - LS-DYNA: software/tools/ansys/ls-dyna.md + - Workbench: software/tools/ansys/workbench.md + - Setting License Preferences: software/tools/ansys/licensing.md + - Licensing and Available Versions: software/tools/ansys/setting-license-preferences.md + - EasyBuild: software/tools/easybuild.md + - Singularity Container: + - Singularity: software/tools/singularity.md + - Singularity on IT4Innovations: software/tools/singularity-it4i.md + - Spack: software/tools/spack.md + - Virtualization: software/tools/virtualization.md + - Visualisation: + - GPI-2: software/viz/gpi2.md + - OpenFOAM: software/viz/openfoam.md + - ParaView: software/viz/paraview.md - PBS Pro Documentation: pbspro.md -# - Testing: -# - Colors: colors.md + - API Documentation: apiv1.md extra: - #logo: img/logo2.png repo_icon: gitlab palette: primary: 'grey' @@ -217,12 +191,11 @@ markdown_extensions: - codehilite - markdown.extensions.admonition: - pymdownx.arithmatex: - - markdown.extensions.toc: + - markdown.extensions.toc(permalink=true): slugify: !!python/name:pymdownx.slugs.uslugify - permalink: î…— - pymdownx.snippets: base_path: docs.it4i/snippets google_analytics: - 'UA-90498826-1' - - 'auto' + - 'auto' \ No newline at end of file diff --git a/pathcheck.sh b/pathcheck.sh new file mode 100644 index 0000000000000000000000000000000000000000..932b0fb9c118eaa05616f85b7b095786fb9939bd --- /dev/null +++ b/pathcheck.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +#the script controls links, only inside the whole directory, doesnt control outside pages + +for file in $@; do +check=$(cat "$file" | grep -Po "\[.*?\]\([^ ]*?\)" | grep -v "#" | grep -vE "http|@|www|ftp|none" | sed 's/\[.*\]//g' | sed 's/[()]//g' | sed 's/\/$/.md/g') +if [ ! -z "$check" ]; then + +wrong=0 +for line in $check; do + +pathtocheck=$(dirname "$file")/$line + + +if [ -f $(dirname "$file")/"$line" ]; then + : + #echo "ok $pathtocheck" +else + if [ $wrong -eq "0" ]; then + echo "" + echo "\n+++++ $file +++++\n" + fi + wrong=1 + echo "wrong link in $pathtocheck" + +fi +done +fi +done +echo "" diff --git a/scripts/get_cvs.sh b/scripts/get_cvs.sh index 2726c1a49dfdbcaa71c748a07894c8f1a9b676d3..703af83ef4e0e0e28942f250927db5e0517ab735 100755 --- a/scripts/get_cvs.sh +++ b/scripts/get_cvs.sh @@ -1,4 +1,5 @@ #!/bin/bash -curl -s https://code.it4i.cz/hrb33/modules-anselm/raw/master/anselm.csv -o modules-anselm.csv -curl -s https://code.it4i.cz/hrb33/modules-salomon/raw/master/salomon.csv -o modules-salomon.csv -curl -s https://code.it4i.cz/hrb33/modules-salomon/raw/master/salomon-uv.csv -o modules-salomon-uv.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/anselm.csv -o modules-anselm.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/salomon.csv -o modules-salomon.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/uv2000.csv -o modules-salomon-uv.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/phi.csv -o modules-salomon-phi.csv diff --git a/scripts/get_modules.sh b/scripts/get_modules.sh index b356329cdd2d1cd45497e965c8646456e1f249bf..bade433dacc2632b0fb12cad3e606f5340edd70f 100755 --- a/scripts/get_modules.sh +++ b/scripts/get_modules.sh @@ -1,7 +1,9 @@ #!/bin/bash -curl -s https://code.it4i.cz/hrb33/modules-anselm/raw/master/anselm.md -o docs.it4i/modules-anselm.md -curl -s https://code.it4i.cz/hrb33/modules-salomon/raw/master/salomon.md -o docs.it4i/modules-salomon.md -curl -s https://code.it4i.cz/hrb33/modules-salomon/raw/master/salomon-uv.md -o docs.it4i/modules-salomon-uv.md -curl -s https://code.it4i.cz/hrb33/modules-anselm/raw/master/anselm.csv -o scripts/modules-anselm.csv -curl -s https://code.it4i.cz/hrb33/modules-salomon/raw/master/salomon.csv -o scripts/modules-salomon.csv -curl -s https://code.it4i.cz/hrb33/modules-salomon/raw/master/salomon-uv.csv -o scripts/modules-salomon-uv.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/anselm.md -o docs.it4i/modules-anselm.md +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/salomon.md -o docs.it4i/modules-salomon.md +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/uv2000.md -o docs.it4i/modules-salomon-uv.md +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/phi.md -o docs.it4i/modules-salomon-phi.md +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/anselm.csv -o scripts/modules-anselm.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/salomon.csv -o scripts/modules-salomon.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/uv2000.csv -o scripts/modules-salomon-uv.csv +curl -s https://code.it4i.cz/sccs/it4i-modules/raw/master/phi.csv -o scripts/modules-salomon-phi.csv diff --git a/scripts/modules-json.py b/scripts/modules-json.py index f5abe70efbc797b62ec0f642bc11d29351e0ab6c..57474110d6e3271443fc895a48a1283569aeb427 100755 --- a/scripts/modules-json.py +++ b/scripts/modules-json.py @@ -9,7 +9,7 @@ def get_data(filename): '''function to read the data form the input csv file to use in the analysis''' reader = [] # Just in case the file open fails with open(filename, 'rb') as f: - reader = csv.reader(f,delimiter=',') + reader = csv.reader(f,delimiter=',') #returns all the data from the csv file in list form #f.close() # May need to close the file when done return list(reader) # only return the reader when you have finished. @@ -63,5 +63,5 @@ packages = {} for m in sorted(software.items(), key=lambda i: i[0].lower()): packages[m[0]]=sorted(m[1], key=LooseVersion)[len(m[1])-1] -data = {'total': len(packages), 'projects': packages } +data = {'total': len(packages), 'projects': packages } print json.dumps(data) diff --git a/scripts/modules-matrix.py b/scripts/modules-matrix.py index eab86f22b7953ab637699f35c7b441bb7038c0d6..d84745504d38a8225cce36fe834bbfea56d2fa41 100755 --- a/scripts/modules-matrix.py +++ b/scripts/modules-matrix.py @@ -8,7 +8,7 @@ def get_data(filename): '''function to read the data form the input csv file to use in the analysis''' reader = [] # Just in case the file open fails with open(filename, 'rb') as f: - reader = csv.reader(f,delimiter=',') + reader = csv.reader(f,delimiter=',') #returns all the data from the csv file in list form #f.close() # May need to close the file when done return list(reader) # only return the reader when you have finished.