...
 
Commits (487)
site/
scripts/*.csv
venv/
......@@ -2,79 +2,74 @@ stages:
- test
- build
- deploy
- after_test
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
docs:
stage: test
image: davidhrbac/docker-mdcheck:latest
image: it4innovations/docker-mdcheck:latest
allow_failure: true
script:
- mdl -r ~MD013,~MD033,~MD014,~MD026,~MD037,~MD036,~MD010,~MD029 *.md docs.it4i # BUGS
# nefunkcni, zbytecne, bere to i vypisy konzole
#two spaces:
# stage: test
# image: davidhrbac/docker-mdcheck:latest
# allow_failure: true
# before_script:
# - echo "== Files having more than one space betwee two characters =="
# - find docs.it4i/ -name '*.md' ! -path "docs.it4i/software*" -exec grep -nr "[[:alpha:]] [[:alpha:]]" -l {} +
# script:
# - find docs.it4i/ -name '*.md' ! -path "docs.it4i/software*" -exec grep -nr "[[:alpha:]] [[:alpha:]]" -l {} +
- mdl -r ~MD013,~MD010,~MD014,~MD024,~MD026,~MD029,~MD033,~MD036,~MD037,~MD046 *.md docs.it4i # BUGS
capitalize:
stage: test
image: davidhrbac/docker-mkdocscheck:latest
# allow_failure: true
image: it4innovations/docker-mkdocscheck:latest
script:
- find mkdocs.yml docs.it4i/ \( -name '*.md' -o -name '*.yml' \) -print0 | xargs -0 -n1 scripts/titlemd_test.py
spell check:
stage: test
image: davidhrbac/docker-npmcheck:latest
allow_failure: true
script:
#- npm i markdown-spellcheck -g
- mdspell '**/*.md' '!docs.it4i/module*.md' '!docs.it4i/anselm-cluster-documentation/software/omics-master/overview.md' '!docs.it4i/downtimes_history.md' -rns --en-us
ext_links:
stage: test
image: davidhrbac/docker-mdcheck:latest
image: it4innovations/docker-mdcheck:latest
allow_failure: true
after_script:
# remove JSON results
- rm *.json
script:
#- find docs.it4i/ -name '*.md' -exec grep --color -l http {} + | xargs awesome_bot -t 10
- find docs.it4i/ -name '*.md' -exec grep --color -l http {} + | xargs awesome_bot -t 10 --allow-dupe --allow-redirect
#- |
# COMMIT_MESSAGE=$(git log -1 --pretty=%B | grep "Merge branch 'hot_fix' into 'master'" )
# if [[ ${COMMIT_MESSAGE} == *hot_fix* ]]; then
# echo "Skipping ci build"
# exit 0
# else
# find docs.it4i/ -name '*.md' ! -name 'modules-*.md' -exec grep --color -l http {} + | xargs awesome_bot -t 10 --allow-dupe --allow-redirect
# fi
only:
- master
404s:
stage: after_test
image: it4innovations/docker-mkdocscheck:latest
before_script:
- wget -V
- echo https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
- wget --spider -e robots=off -o wget.log -r -p https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/ || true
script:
- cat wget.log | awk '/^Found [0-9]+ broken link[s]?.$/,/FINISHED/ { rc=-1; print $0 }; END { exit rc }'
mkdocs:
stage: build
image: davidhrbac/docker-mkdocscheck:latest
image: it4innovations/docker-mkdocscheck:latest
cache:
paths:
- .cache/pip
- venv/
before_script:
- python -V # Print out python version for debugging
- virtualenv venv
- source venv/bin/activate
- pip install -r requirements.txt
script:
- mkdocs -V
#- apt-get update
#- apt-get -y install git
# add version to footer
- bash scripts/add_version.sh
# get modules list from clusters
- bash scripts/get_modules.sh
# generate site_url
- (if [ "${CI_BUILD_REF_NAME}" != 'master' ]; then sed -i "s/\(site_url.*$\)/\1devel\/$CI_BUILD_REF_NAME\//" mkdocs.yml;fi);
# generate ULT for code link
- sed -i "s/master/$CI_BUILD_REF_NAME/g" material/partials/toc.html
# regenerate modules matrix
- python scripts/modules-matrix.py > docs.it4i/modules-matrix.md
- python scripts/modules-json.py > docs.it4i/modules-matrix.json
- curl -f0 https://scs-test.it4i.cz/devel/apidocs/master/scs_api.server_public.md -o docs.it4i/apiv1.md
- curl -f0 https://code.it4i.cz/sccs/scs-api-public/raw/master/scs_api.server_public.md -o docs.it4i/apiv1.md
# build pages
- mkdocs build
# compress search_index.json
#- bash scripts/clean_json.sh site/mkdocs/search_index.json
# replace broken links in 404.html
- sed -i 's,href="" title=",href="/" title=",g' site/404.html
- cp site/404.html site/403.html
......@@ -86,19 +81,10 @@ mkdocs:
- site
expire_in: 1 week
## zbytecnost v dokumentaci
#shellcheck:
# stage: test
# image: davidhrbac/docker-shellcheck:latest
# allow_failure: true
# script:
# - which shellcheck || apt-get update && apt-get install -y shellcheck
# - find . -name *.sh -not -path "./docs.it4i/*" -not -path "./site/*" -exec shellcheck {} +
deploy to stage:
environment: stage
stage: deploy
image: davidhrbac/docker-mkdocscheck:latest
image: it4innovations/docker-mkdocscheck:latest
before_script:
# install ssh-agent
- 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
......@@ -111,7 +97,6 @@ deploy to stage:
# WARNING: use only in docker container, if you use it with shell you will overwrite your user's ssh config
- mkdir -p ~/.ssh
- echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
- useradd -lM nginx
script:
- chown nginx:nginx site -R
- rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
......@@ -121,7 +106,7 @@ deploy to stage:
deploy to production:
environment: production
stage: deploy
image: davidhrbac/docker-mkdocscheck:latest
image: it4innovations/docker-mkdocscheck:latest
before_script:
# install ssh-agent
- 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
......@@ -134,7 +119,6 @@ deploy to production:
# WARNING: use only in docker container, if you use it with shell you will overwrite your user's ssh config
- mkdir -p ~/.ssh
- echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
- useradd -lM nginx
script:
- chown nginx:nginx site -R
- rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/site/
......
This diff is collapsed.
# User documentation
This is project contain IT4Innovation user documentation source.
This project contains IT4Innovations user documentation source.
## Development
### Install
```console
$ sudo apt install libpython-dev
$ virtualenv venv
$ source venv/bin/activate
$ pip install -r requirements.txt
```
### Package upgrade with pip
```console
$ pip list -o
$ pip install --upgrade package
$ pip freeze | sed '/pkg-resources==/d' > requirements.txt
```
## Environments
......@@ -12,11 +31,6 @@ This is project contain IT4Innovation user documentation source.
* [http://facelessuser.github.io/pymdown-extensions/](http://facelessuser.github.io/pymdown-extensions/)
* [http://squidfunk.github.io/mkdocs-material/](http://squidfunk.github.io/mkdocs-material/)
## Rules
* [spellcheck https://github.com/lukeapage/node-markdown-spellcheck](spellcheck https://github.com/lukeapage/node-markdown-spellcheck)
* [SI units http://physics.nist.gov/cuu/Units/checklist.html](SI units http://physics.nist.gov/cuu/Units/checklist.html)
```
fair-share
InfiniBand
......@@ -41,33 +55,3 @@ $$
```
To enable the MathJX on page you need to enable it by adding line ```---8<--- "mathjax.md"``` at the end of file.
## Developemnt Environment
### MkDocs
Documentation pages are build with [MkDocs](http://www.mkdocs.org/), [MkDocs at GitHub](https://github.com/mkdocs/mkdocs/). You need to install mkdocs loacally so that you can build the pages and run developement web server.
```bash
pip install mkdocs pygments pymdown-extensions
```
You can build the pages with `mkdocs build`.
```bash
‹hrb33-toshiba› 11:47 $ mkdocs build
INFO - Cleaning site directory
INFO - Building documentation to directory: /home/hrb33/Dokumenty/dev/it4i/docs.it4i.git/site
```
You can start local server with `mkdocs serve`.
```bash
✔ ~/Dokumenty/dev/it4i/docs.it4i.git [readme L|✚ 1…6⚑ 1]
‹hrb33-toshiba› 11:47 $ mkdocs serve
INFO - Building documentation...
INFO - Cleaning site directory
[I 170124 11:47:27 server:283] Serving on http://127.0.0.1:8000
[I 170124 11:47:27 handlers:60] Start watching changes
[I 170124 11:47:27 handlers:62] Start detecting changes
```
# Compute Nodes
## Nodes Configuration
## Node Configuration
Anselm is cluster of x86-64 Intel based nodes built on Bull Extreme Computing bullx technology. The cluster contains four types of compute nodes.
Anselm is a cluster of x86-64 Intel based nodes built with Bull Extreme Computing bullx technology. The cluster contains four types of compute nodes.
### Compute Nodes Without Accelerator
### Compute Nodes Without Accelerators
* 180 nodes
* 2880 cores in total
......@@ -14,7 +14,7 @@ Anselm is cluster of x86-64 Intel based nodes built on Bull Extreme Computing bu
* bullx B510 blade servers
* cn[1-180]
### Compute Nodes With GPU Accelerator
### Compute Nodes With a GPU Accelerator
* 23 nodes
* 368 cores in total
......@@ -25,7 +25,7 @@ Anselm is cluster of x86-64 Intel based nodes built on Bull Extreme Computing bu
* bullx B515 blade servers
* cn[181-203]
### Compute Nodes With MIC Accelerator
### Compute Nodes With a MIC Accelerator
* 4 nodes
* 64 cores in total
......@@ -42,26 +42,26 @@ Anselm is cluster of x86-64 Intel based nodes built on Bull Extreme Computing bu
* 32 cores in total
* 2 Intel Sandy Bridge E5-2665, 8-core, 2.4GHz processors per node
* 512 GB of physical memory per node
* two 300GB SAS 3,5”15krpm HDD (RAID1) per node
* two 300GB SAS 3,5” 15krpm HDD (RAID1) per node
* two 100GB SLC SSD per node
* bullx R423-E3 servers
* cn[208-209]
![](../img/bullxB510.png)
**Figure Anselm bullx B510 servers**
**Anselm bullx B510 servers**
### Compute Nodes Summary
### Compute Node Summary
| Node type | Count | Range | Memory | Cores | [Access](resources-allocation-policy/) |
| -------------------------- | ----- | ----------- | ------ | ----------- | -------------------------------------- |
| Nodes without accelerator | 180 | cn[1-180] | 64GB | 16 @ 2.4GHz | qexp, qprod, qlong, qfree, qatlas, qprace |
| Nodes with GPU accelerator | 23 | cn[181-203] | 96GB | 16 @ 2.3GHz | qnvidia, qexp, qatlas |
| Nodes with MIC accelerator | 4 | cn[204-207] | 96GB | 16 @ 2.3GHz | qmic, qexp |
| Fat compute nodes | 2 | cn[208-209] | 512GB | 16 @ 2.4GHz | qfat, qexp |
| Node type | Count | Range | Memory | Cores | Queues |
| ---------------------------- | ----- | ----------- | ------ | ----------- | -------------------------------------- |
| Nodes without an accelerator | 180 | cn[1-180] | 64GB | 16 @ 2.4GHz | qexp, qprod, qlong, qfree, qprace, qatlas |
| Nodes with a GPU accelerator | 23 | cn[181-203] | 96GB | 16 @ 2.3GHz | qnvidia, qexp |
| Nodes with a MIC accelerator | 4 | cn[204-207] | 96GB | 16 @ 2.3GHz | qmic, qexp |
| Fat compute nodes | 2 | cn[208-209] | 512GB | 16 @ 2.4GHz | qfat, qexp |
## Processor Architecture
Anselm is equipped with Intel Sandy Bridge processors Intel Xeon E5-2665 (nodes without accelerator and fat nodes) and Intel Xeon E5-2470 (nodes with accelerator). Processors support Advanced Vector Extensions (AVX) 256-bit instruction set.
Anselm is equipped with Intel Sandy Bridge processors Intel Xeon E5-2665 (nodes without accelerators and fat nodes) and Intel Xeon E5-2470 (nodes with accelerators). The processors support Advanced Vector Extensions (AVX) 256-bit instruction set.
### Intel Sandy Bridge E5-2665 Processor
......@@ -83,7 +83,7 @@ Anselm is equipped with Intel Sandy Bridge processors Intel Xeon E5-2665 (nodes
* L3: 20 MB per processor
* memory bandwidth at the level of the processor: 38.4 GB/s
Nodes equipped with Intel Xeon E5-2665 CPU have set PBS resource attribute cpu_freq = 24, nodes equipped with Intel Xeon E5-2470 CPU have set PBS resource attribute cpu_freq = 23.
Nodes equipped with Intel Xeon E5-2665 CPU have a set PBS resource attribute cpu_freq = 24, nodes equipped with Intel Xeon E5-2470 CPU have set PBS resource attribute cpu_freq = 23.
```console
$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16:cpu_freq=24 -I
......@@ -97,9 +97,11 @@ Intel Turbo Boost Technology is used by default, you can disable it for all nod
$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I
```
## Memory Architecture
## Memmory Architecture
### Compute Node Without Accelerator
The cluster contains three types of compute nodes.
### Compute Nodes Without Accelerators
* 2 sockets
* Memory Controllers are integrated into processors.
......@@ -109,7 +111,7 @@ $ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I
* Data rate support: up to 1600MT/s
* Populated memory: 8 x 8 GB DDR3 DIMM 1600 MHz
### Compute Node With GPU or MIC Accelerator
### Compute Nodes With a GPU or MIC Accelerator
* 2 sockets
* Memory Controllers are integrated into processors.
......@@ -119,7 +121,7 @@ $ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I
* Data rate support: up to 1600MT/s
* Populated memory: 6 x 16 GB DDR3 DIMM 1600 MHz
### Fat Compute Node
### Fat Compute Nodes
* 2 sockets
* Memory Controllers are integrated into processors.
......
# Environment and Modules
## Environment Customization
After logging in, you may want to configure the environment. Write your preferred path definitions, aliases, functions and module loads in the .bashrc file
```console
$ cat ./bashrc
# ./bashrc
# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi
# User specific aliases and functions
alias qs='qstat -a'
module load PrgEnv-gnu
# Display information to standard output - only in interactive ssh session
if [ -n "$SSH_TTY" ]
then
module list # Display loaded modules
fi
```
!!! note
Do not run commands outputting to standard output (echo, module list, etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental functionality (scp, PBS) of your account! Conside utilization of SSH session interactivity for such commands as stated in the previous example.
## Application Modules
In order to configure your shell for running particular application on Anselm we use Module package interface.
!!! note
The modules set up the application paths, library paths and environment variables for running particular application.
We have also second modules repository. This modules repository is created using tool called EasyBuild. On Salomon cluster, all modules will be build by this tool. If you want to use software from this modules repository, please follow instructions in section [Application Modules Path Expansion](environment-and-modules/#application-modules-path-expansion).
The modules may be loaded, unloaded and switched, according to momentary needs.
To check available modules use
```console
$ ml av
```
To load a module, for example the octave module use
```console
$ ml octave
```
loading the octave module will set up paths and environment variables of your active shell such that you are ready to run the octave software
To check loaded modules use
```console
$ ml
```
To unload a module, for example the octave module use
```console
$ ml -octave
```
Following modules set up the development environment
PrgEnv-gnu sets up the GNU development environment in conjunction with the bullx MPI library
PrgEnv-intel sets up the INTEL development environment in conjunction with the Intel MPI library
## Application Modules Path Expansion
All application modules on Anselm cluster (and further) will be build using tool called [EasyBuild](http://hpcugent.github.io/easybuild/ "EasyBuild").
This command expands your searched paths to modules. You can also add this command to the .bashrc file to expand paths permanently. After this command, you can use same commands to list/add/remove modules as is described above.
# Hardware Overview
The Anselm cluster consists of 209 computational nodes named cn[1-209] of which 180 are regular compute nodes, 23 GPU Kepler K20m accelerated nodes, 4 MIC Xeon Phi 5110P accelerated nodes and 2 fat nodes. Each node is a powerful x86-64 computer, equipped with 16 cores (two eight-core Intel Sandy Bridge processors), at least 64 GB RAM, and local hard drive. The user access to the Anselm cluster is provided by two login nodes login[1,2]. The nodes are interlinked by high speed InfiniBand and Ethernet networks. All nodes share 320 TB /home disk storage to store the user files. The 146 TB shared /scratch storage is available for the scratch data.
The Anselm cluster consists of 209 computational nodes named cn[1-209] of which 180 are regular compute nodes, 23 are GPU Kepler K20 accelerated nodes, 4 are MIC Xeon Phi 5110P accelerated nodes, and 2 are fat nodes. Each node is a powerful x86-64 computer, equipped with 16 cores (two eight-core Intel Sandy Bridge processors), at least 64 GB of RAM, and a local hard drive. User access to the Anselm cluster is provided by two login nodes login[1,2]. The nodes are interlinked through high speed InfiniBand and Ethernet networks. All nodes share a 320 TB /home disk for storage of user files. The 146 TB shared /scratch storage is available for scratch data.
The Fat nodes are equipped with large amount (512 GB) of memory. Fat nodes may access 45 TB of dedicated block storage. Accelerated nodes, fat nodes are available [upon request](https://support.it4i.cz/rt) made by a PI.
The Fat nodes are equipped with a large amount (512 GB) of memory. Virtualization infrastructure provides resources to run long term servers and services in virtual mode. Fat nodes and virtual servers may access 45 TB of dedicated block storage. Accelerated nodes, fat nodes, and virtualization infrastructure are available [upon request][a] from a PI.
Schematic representation of the Anselm cluster. Each box represents a node (computer) or storage capacity:
......@@ -12,21 +12,21 @@ The cluster compute nodes cn[1-207] are organized within 13 chassis.
There are four types of compute nodes:
* 180 compute nodes without the accelerator
* 23 compute nodes with GPU accelerator - equipped with NVIDIA Tesla Kepler K20m
* 4 compute nodes with MIC accelerator - equipped with Intel Xeon Phi 5110P
* 2 fat nodes - equipped with 512 GB RAM and two 100 GB SSD drives
* 180 compute nodes without an accelerator
* 23 compute nodes with a GPU accelerator - an NVIDIA Tesla Kepler K20m
* 4 compute nodes with a MIC accelerator - an Intel Xeon Phi 5110P
* 2 fat nodes - equipped with 512 GB of RAM and two 100 GB SSD drives
[More about Compute nodes](compute-nodes/).
[More about Compute nodes][1].
GPU and accelerated nodes are available upon request, see the [Resources Allocation Policy](resources-allocation-policy/).
GPU and accelerated nodes are available upon request, see the [Resources Allocation Policy][2].
All these nodes are interconnected by fast InfiniBand network and Ethernet network. [More about the Network](network/).
Every chassis provides InfiniBand switch, marked **isw**, connecting all nodes in the chassis, as well as connecting the chassis to the upper level switches.
All of these nodes are interconnected through fast InfiniBand and Ethernet networks. [More about the Network][3].
Every chassis provides an InfiniBand switch, marked **isw**, connecting all nodes in the chassis, as well as connecting the chassis to the upper level switches.
All nodes share 360 TB /home disk storage to store user files. The 146 TB shared /scratch storage is available for the scratch data. These file systems are provided by Lustre parallel file system. There is also local disk storage available on all compute nodes in /lscratch. [More about Storage](storage/).
All of the nodes share a 360 TB /home disk for storage of user files. The 146 TB shared /scratch storage is available for scratch data. These file systems are provided by the Lustre parallel file system. There is also local disk storage available on all compute nodes in /lscratch. [More about Storage][4].
The user access to the Anselm cluster is provided by two login nodes login1, login2, and data mover node dm1. [More about accessing cluster.](shell-and-data-access/)
User access to the Anselm cluster is provided by two login nodes login1, login2, and data mover node dm1. [More about accessing the cluster][5].
The parameters are summarized in the following tables:
......@@ -35,8 +35,8 @@ The parameters are summarized in the following tables:
| Primary purpose | High Performance Computing |
| Architecture of compute nodes | x86-64 |
| Operating system | Linux (CentOS) |
| [**Compute nodes**](compute-nodes/) | |
| Totally | 209 |
| [**Compute nodes**][1] | |
| Total | 209 |
| Processor cores | 16 (2 x 8 cores) |
| RAM | min. 64 GB, min. 4 GB per core |
| Local disk drive | yes - usually 500 GB |
......@@ -57,4 +57,12 @@ The parameters are summarized in the following tables:
| MIC accelerated | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB | Intel Xeon Phi 5110P |
| Fat compute node | 2 x Intel Sandy Bridge E5-2665, 2.4 GHz | 512 GB | - |
For more details please refer to the [Compute nodes](compute-nodes/), [Storage](storage/), and [Network](network/).
For more details refer to [Compute nodes][1], [Storage][4], and [Network][3].
[1]: compute-nodes.md
[2]: ../general/resources-allocation-policy.md
[3]: network.md
[4]: storage.md
[5]: ../general/shell-and-data-access.md
[a]: https://support.it4i.cz/rt
# Introduction
Welcome to Anselm supercomputer cluster. The Anselm cluster consists of 209 compute nodes, totaling 3344 compute cores with 15 TB RAM and giving over 94 TFLOP/s theoretical peak performance. Each node is a powerful x86-64 computer, equipped with 16 cores, at least 64 GB RAM, and 500 GB hard disk drive. Nodes are interconnected by fully non-blocking fat-tree InfiniBand network and equipped with Intel Sandy Bridge processors. A few nodes are also equipped with NVIDIA Kepler GPU or Intel Xeon Phi MIC accelerators. Read more in [Hardware Overview](hardware-overview/).
Welcome to Anselm supercomputer cluster. The Anselm cluster consists of 209 compute nodes, totaling 3344 compute cores with 15 TB RAM, giving over 94 TFLOP/s theoretical peak performance. Each node is a powerful x86-64 computer, equipped with 16 cores, at least 64 GB of RAM, and a 500 GB hard disk drive. Nodes are interconnected through a fully non-blocking fat-tree InfiniBand network, and are equipped with Intel Sandy Bridge processors. A few nodes are also equipped with NVIDIA Kepler GPU or Intel Xeon Phi MIC accelerators. Read more in [Hardware Overview][1].
The cluster runs [operating system](software/operating-system/), which is compatible with the RedHat [Linux family.](http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg) We have installed a wide range of software packages targeted at different scientific domains. These packages are accessible via the [modules environment](environment-and-modules/).
The cluster runs with an operating system which is compatible with the RedHat [Linux family][a]. We have installed a wide range of software packages targeted at different scientific domains. These packages are accessible via the [modules environment][2].
User data shared file-system (HOME, 320 TB) and job data shared file-system (SCRATCH, 146 TB) are available to users.
The user data shared file-system (HOME, 320 TB) and job data shared file-system (SCRATCH, 146 TB) are available to users.
The PBS Professional workload manager provides [computing resources allocations and job execution](resources-allocation-policy/).
The PBS Professional workload manager provides [computing resources allocations and job execution][3].
Read more on how to [apply for resources](../general/applying-for-resources/), [obtain login credentials](../general/obtaining-login-credentials/obtaining-login-credentials/) and [access the cluster](shell-and-data-access/).
Read more on how to [apply for resources][4], [obtain login credentials][5] and [access the cluster][6].
[1]: hardware-overview.md
[2]: ../environment-and-modules.md
[3]: ../general/resources-allocation-policy.md
[4]: ../general/applying-for-resources.md
[5]: ../general/obtaining-login-credentials/obtaining-login-credentials.md
[6]: ../general/shell-and-data-access.md
[a]: http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg
This diff is collapsed.
# Network
All compute and login nodes of Anselm are interconnected by [InfiniBand](http://en.wikipedia.org/wiki/InfiniBand) QDR network and by Gigabit [Ethernet](http://en.wikipedia.org/wiki/Ethernet) network. Both networks may be used to transfer user data.
All of the compute and login nodes of Anselm are interconnected through an [InfiniBand][a] QDR network and a Gigabit [Ethernet][b] network. Both networks may be used to transfer user data.
## InfiniBand Network
All compute and login nodes of Anselm are interconnected by a high-bandwidth, low-latency [InfiniBand](http://en.wikipedia.org/wiki/InfiniBand) QDR network (IB 4 x QDR, 40 Gbps). The network topology is a fully non-blocking fat-tree.
All of the compute and login nodes of Anselm are interconnected through a high-bandwidth, low-latency [InfiniBand][a] QDR network (IB 4 x QDR, 40 Gbps). The network topology is a fully non-blocking fat-tree.
The compute nodes may be accessed via the InfiniBand network using ib0 network interface, in address range 10.2.1.1-209. The MPI may be used to establish native InfiniBand connection among the nodes.
!!! note
The network provides **2170 MB/s** transfer rates via the TCP connection (single stream) and up to **3600 MB/s** via native InfiniBand protocol.
The network provides **2170 MB/s** transfer rates via the TCP connection (single stream) and up to **3600 MB/s** via the native InfiniBand protocol.
The Fat tree topology ensures that peak transfer rates are achieved between any two nodes, independent of network traffic exchanged among other nodes concurrently.
......@@ -19,6 +19,8 @@ The compute nodes may be accessed via the regular Gigabit Ethernet network inter
## Example
In this example, we access the node cn110 through the InfiniBand network via the ib0 interface, then from cn110 to cn108 through the Ethernet network.
```console
$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob
$ qstat -n -u username
......@@ -32,4 +34,5 @@ $ ssh 10.2.1.110
$ ssh 10.1.1.108
```
In this example, we access the node cn110 by InfiniBand network via the ib0 interface, then from cn110 to cn108 by Ethernet network.
[a]: http://en.wikipedia.org/wiki/InfiniBand
[b]: http://en.wikipedia.org/wiki/Ethernet
This diff is collapsed.
# Remote Visualization Service
## Introduction
The goal of this service is to provide the users a GPU accelerated use of OpenGL applications, especially for pre- and post- processing work, where not only the GPU performance is needed but also fast access to the shared file systems of the cluster and a reasonable amount of RAM.
The service is based on integration of open source tools VirtualGL and TurboVNC together with the cluster's job scheduler PBS Professional.
Currently two compute nodes are dedicated for this service with following configuration for each node:
| [**Visualization node configuration**](compute-nodes/) | |
| ------------------------------------------------------ | --------------------------------------- |
| CPU | 2 x Intel Sandy Bridge E5-2670, 2.6 GHz |
| Processor cores | 16 (2 x 8 cores) |
| RAM | 64 GB, min. 4 GB per core |
| GPU | NVIDIA Quadro 4000, 2 GB RAM |
| Local disk drive | yes - 500 GB |
| Compute network | InfiniBand QDR |
## Schematic Overview
![rem_vis_scheme](../img/scheme.png "rem_vis_scheme")
![rem_vis_legend](../img/legend.png "rem_vis_legend")
## How to Use the Service
### Setup and Start Your Own TurboVNC Server
TurboVNC is designed and implemented for cooperation with VirtualGL and available for free for all major platforms. For more information and download, please refer to: <http://sourceforge.net/projects/turbovnc/>
**Always use TurboVNC on both sides** (server and client) **don't mix TurboVNC and other VNC implementations** (TightVNC, TigerVNC, ...) as the VNC protocol implementation may slightly differ and diminish your user experience by introducing picture artifacts, etc.
The procedure is:
#### 1. Connect to a Login Node
Please [follow the documentation](shell-and-data-access/).
#### 2. Run Your Own Instance of TurboVNC Server
To have the OpenGL acceleration, **24 bit color depth must be used**. Otherwise only the geometry (desktop size) definition is needed.
!!! hint
At first VNC server run you need to define a password.
This example defines desktop with dimensions 1200x700 pixels and 24 bit color depth.
```console
$ module load turbovnc/1.2.2
$ vncserver -geometry 1200x700 -depth 24
Desktop 'TurboVNC: login2:1 (username)' started on display login2:1
Starting applications specified in /home/username/.vnc/xstartup.turbovnc
Log file is /home/username/.vnc/login2:1.log
```
#### 3. Remember Which Display Number Your VNC Server Runs (You Will Need It in the Future to Stop the Server)
```console
$ vncserver -list
TurboVNC server sessions:
X DISPLAY # PROCESS ID
:1 23269
```
In this example the VNC server runs on display **:1**.
#### 4. Remember the Exact Login Node, Where Your VNC Server Runs
```console
$ uname -n
login2
```
In this example the VNC server runs on **login2**.
#### 5. Remember on Which TCP Port Your Own VNC Server Is Running
To get the port you have to look to the log file of your VNC server.
```console
$ grep -E "VNC.*port" /home/username/.vnc/login2:1.log
20/02/2015 14:46:41 Listening for VNC connections on TCP port 5901
```
In this example the VNC server listens on TCP port **5901**.
#### 6. Connect to the Login Node Where Your VNC Server Runs With SSH to Tunnel Your VNC Session
Tunnel the TCP port on which your VNC server is listenning.
```console
$ ssh login2.anselm.it4i.cz -L 5901:localhost:5901
```
x-window-system/
If you use Windows and Putty, please refer to port forwarding setup in the documentation:
[x-window-and-vnc#section-12](../general/accessing-the-clusters/graphical-user-interface/x-window-system/)
#### 7. If You Don't Have Turbo VNC Installed on Your Workstation
Get it from: <http://sourceforge.net/projects/turbovnc/>
#### 8. Run TurboVNC Viewer From Your Workstation
Mind that you should connect through the SSH tunneled port. In this example it is 5901 on your workstation (localhost).
```console
$ vncviewer localhost:5901
```
If you use Windows version of TurboVNC Viewer, just run the Viewer and use address **localhost:5901**.
#### 9. Proceed to the Chapter "Access the Visualization Node"
Now you should have working TurboVNC session connected to your workstation.
#### 10. After You End Your Visualization Session
Don't forget to correctly shutdown your own VNC server on the login node!
```console
$ vncserver -kill :1
```
### Access the Visualization Node
**To access the node use a dedicated PBS Professional scheduler queue
qviz**. The queue has following properties:
| queue | active project | project resources | nodes | min ncpus | priority | authorization | walltime |
| ---------------------------- | -------------- | ----------------- | ----- | --------- | -------- | ------------- | ---------------- |
| **qviz** Visualization queue | yes | none required | 2 | 4 | 150 | no | 1 hour / 8 hours |
Currently when accessing the node, each user gets 4 cores of a CPU allocated, thus approximately 16 GB of RAM and 1/4 of the GPU capacity.
!!! note
If more GPU power or RAM is required, it is recommended to allocate one whole node per user, so that all 16 cores, whole RAM and whole GPU is exclusive. This is currently also the maximum allowed allocation per one user. One hour of work is allocated by default, the user may ask for 2 hours maximum.
To access the visualization node, follow these steps:
#### 1. In Your VNC Session, Open a Terminal and Allocate a Node Using PBSPro qsub Command
This step is necessary to allow you to proceed with next steps.
```console
$ qsub -I -q qviz -A PROJECT_ID
```
In this example the default values for CPU cores and usage time are used.
```console
$ qsub -I -q qviz -A PROJECT_ID -l select=1:ncpus=16 -l walltime=02:00:00
```
Substitute **PROJECT_ID** with the assigned project identification string.
In this example a whole node for 2 hours is requested.
If there are free resources for your request, you will have a shell unning on an assigned node. Please remember the name of the node.
```console
$ uname -n
srv8
```
In this example the visualization session was assigned to node **srv8**.
#### 2. In Your VNC Session Open Another Terminal (Keep the One With Interactive PBSPro Job Open)
Setup the VirtualGL connection to the node, which PBSPro allocated for our job.
```console
$ vglconnect srv8
```
You will be connected with created VirtualGL tunnel to the visualization ode, where you will have a shell.
#### 3. Load the VirtualGL Module
```console
$ module load virtualgl/2.4
```
#### 4. Run Your Desired OpenGL Accelerated Application Using VirtualGL Script "Vglrun"
```console
$ vglrun glxgears
```
If you want to run an OpenGL application which is vailable through modules, you need at first load the respective module. E.g. to run the **Mentat** OpenGL application from **MARC** software ackage use:
```console
$ module load marc/2013.1
$ vglrun mentat
```
#### 5. After You End Your Work With the OpenGL Application
Just logout from the visualization node and exit both opened terminals nd end your VNC server session as described above.
## Tips and Tricks
If you want to increase the responsibility of the visualization, please adjust your TurboVNC client settings in this way:
![rem_vis_settings](../img/turbovncclientsetting.png "rem_vis_settings")
To have an idea how the settings are affecting the resulting picture utility three levels of "JPEG image quality" are demonstrated:
** JPEG image quality = 30 **
![rem_vis_q3](../img/quality3.png "rem_vis_q3")
** JPEG image quality = 15 **
![rem_vis_q2](../img/quality2.png "rem_vis_q2")
** JPEG image quality = 10 **
![rem_vis_q1](../img/quality1.png "rem_vis_q1")
# Resource Allocation and Job Execution
To run a [job](job-submission-and-execution/), [computational resources](resources-allocation-policy/) for this particular job must be allocated. This is done via the PBS Pro job workload manager software, which efficiently distributes workloads across the supercomputer. Extensive information about PBS Pro can be found in the [official documentation here](../pbspro/), especially in the PBS Pro User's Guide.
## Resources Allocation Policy
The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. [The Fair-share](job-priority/) at Anselm ensures that individual users may consume approximately equal amount of resources per week. The resources are accessible via several queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. Following queues are available to Anselm users:
* **qexp**, the Express queue
* **qprod**, the Production queue
* **qlong**, the Long queue, regula
* **qnvidia**, **qmic**, **qfat**, the Dedicated queues
* **qfree**, the Free resource utilization queue
!!! note
Check the queue status at <https://extranet.it4i.cz/anselm/>
Read more on the [Resource AllocationPolicy](resources-allocation-policy/) page.
## Job Submission and Execution
!!! note
Use the **qsub** command to submit your jobs.
The qsub submits the job into the queue. The qsub command creates a request to the PBS Job manager for allocation of specified resources. The **smallest allocation unit is entire node, 16 cores**, with exception of the qexp queue. The resources will be allocated when available, subject to allocation policies and constraints. **After the resources are allocated the jobscript or interactive shell is executed on first of the allocated nodes.**
Read more on the [Job submission and execution](job-submission-and-execution/) page.
## Capacity Computing
!!! note
Use Job arrays when running huge number of jobs.
Use GNU Parallel and/or Job arrays when running (many) single core jobs.
In many cases, it is useful to submit huge (100+) number of computational jobs into the PBS queue system. Huge number of (small) jobs is one of the most effective ways to execute embarrassingly parallel calculations, achieving best runtime, throughput and computer utilization. In this chapter, we discuss the the recommended way to run huge number of jobs, including **ways to run huge number of single core jobs**.
Read more on [Capacity computing](capacity-computing/) page.
# Resources Allocation Policy
## Job Queue Policies
The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. The Fair-share at Anselm ensures that individual users may consume approximately equal amount of resources per week. Detailed information in the [Job scheduling](job-priority/) section. The resources are accessible via several queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. Following table provides the queue partitioning overview:
!!! note
Check the queue status at <https://extranet.it4i.cz/anselm/>
| queue | active project | project resources | nodes | min ncpus | priority | authorization | walltime |
| ------------------- | -------------- | ----------------- | ---------------------------------------------------- | --------- | -------- | ------------- | -------- |
| qexp | no | none required | 2 reserved, 31 totalincluding MIC, GPU | 1 | 150 | no | 1 h |
| qprod | yes | 0 | 178 nodes w/o accelerator | 16 | 0 | no | 24/48 h |
| qlong | yes | 0 | 60 nodes w/o accelerator | 16 | 0 | no | 72/144 h |
| qnvidia, qmic | yes | 0 | 23 nvidia nodes, 4 mic nodes | 16 | 200 | yes | 24/48 h |
| qfat | yes | 0 | 2 fat nodes | 16 | 200 | yes | 24/144 h |
| qfree | yes | none required | 178 w/o accelerator | 16 | -1024 | no | 12 h |
!!! note
**The qfree queue is not free of charge**. [Normal accounting](#resources-accounting-policy) applies. However, it allows for utilization of free resources, once a Project exhausted all its allocated computational resources. This does not apply for Directors Discreation's projects (DD projects) by default. Usage of qfree after exhaustion of DD projects computational resources is allowed after request for this queue.
**The qexp queue is equipped with the nodes not having the very same CPU clock speed.** Should you need the very same CPU speed, you have to select the proper nodes during the PSB job submission.
* **qexp**, the Express queue: This queue is dedicated for testing and running very small jobs. It is not required to specify a project to enter the qexp. There are 2 nodes always reserved for this queue (w/o accelerator), maximum 8 nodes are available via the qexp for a particular user, from a pool of nodes containing Nvidia accelerated nodes (cn181-203), MIC accelerated nodes (cn204-207). This enables to test and tune also accelerated code. The nodes may be allocated on per core basis. No special authorization is required to use it. The maximum runtime in qexp is 1 hour.
* **qprod**, the Production queue: This queue is intended for normal production runs. It is required that active project with nonzero remaining resources is specified to enter the qprod. All nodes may be accessed via the qprod queue, except the reserved ones. 178 nodes without accelerator are included. Full nodes, 16 cores per node are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qprod is 48 hours.
* **qlong**, the Long queue: This queue is intended for long production runs. It is required that active project with nonzero remaining resources is specified to enter the qlong. Only 60 nodes without acceleration may be accessed via the qlong queue. Full nodes, 16 cores per node are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qlong is 144 hours (three times of the standard qprod time - 3 x 48 h).
* **qnvidia**, **qmic**, **qfat**, the Dedicated queues: The queue qnvidia is dedicated to access the Nvidia accelerated nodes, the qmic to access MIC nodes and qfat the Fat nodes. It is required that active project with nonzero remaining resources is specified to enter these queues. 23 nvidia, 4 mic and 2 fat nodes are included. Full nodes, 16 cores per node are allocated. The queues run with very high priority, the jobs will be scheduled before the jobs coming from the qexp queue. An PI needs explicitly ask [support](https://support.it4i.cz/rt/) for authorization to enter the dedicated queues for all users associated to her/his Project.
* **qfree**, The Free resource queue: The queue qfree is intended for utilization of free resources, after a Project exhausted all its allocated computational resources (Does not apply to DD projects by default. DD projects have to request for persmission on qfree after exhaustion of computational resources.). It is required that active project is specified to enter the queue, however no remaining resources are required. Consumed resources will be accounted to the Project. Only 178 nodes without accelerator may be accessed from this queue. Full nodes, 16 cores per node are allocated. The queue runs with very low priority and no special authorization is required to use it. The maximum runtime in qfree is 12 hours.
## Queue Notes
The job wall clock time defaults to **half the maximum time**, see table above. Longer wall time limits can be [set manually, see examples](job-submission-and-execution/).
Jobs that exceed the reserved wall clock time (Req'd Time) get killed automatically. Wall clock time limit can be changed for queuing jobs (state Q) using the qalter command, however can not be changed for a running job (state R).
Anselm users may check current queue configuration at <https://extranet.it4i.cz/anselm/queues>.
## Queue Status
!!! tip
Check the status of jobs, queues and compute nodes at <https://extranet.it4i.cz/anselm/>
![rspbs web interface](../img/rsweb.png)
Display the queue status on Anselm:
```console
$ qstat -q
```
The PBS allocation overview may be obtained also using the rspbs command.
```console
$ rspbs
Usage: rspbs [options]
Options:
--version show program's version number and exit
-h, --help show this help message and exit
--get-node-ncpu-chart
Print chart of allocated ncpus per node
--summary Print summary
--get-server-details Print server
--get-queues Print queues
--get-queues-details Print queues details
--get-reservations Print reservations
--get-reservations-details
Print reservations details
--get-nodes Print nodes of PBS complex
--get-nodeset Print nodeset of PBS complex
--get-nodes-details Print nodes details
--get-jobs Print jobs
--get-jobs-details Print jobs details
--get-jobs-check-params
Print jobid, job state, session_id, user, nodes
--get-users Print users of jobs
--get-allocated-nodes
Print allocated nodes of jobs
--get-allocated-nodeset
Print allocated nodeset of jobs
--get-node-users Print node users
--get-node-jobs Print node jobs
--get-node-ncpus Print number of ncpus per node
--get-node-allocated-ncpus
Print number of allocated ncpus per node
--get-node-qlist Print node qlist
--get-node-ibswitch Print node ibswitch
--get-user-nodes Print user nodes
--get-user-nodeset Print user nodeset
--get-user-jobs Print user jobs
--get-user-jobc Print number of jobs per user
--get-user-nodec Print number of allocated nodes per user
--get-user-ncpus Print number of allocated ncpus per user
--get-qlist-nodes Print qlist nodes
--get-qlist-nodeset Print qlist nodeset
--get-ibswitch-nodes Print ibswitch nodes
--get-ibswitch-nodeset
Print ibswitch nodeset
--state=STATE Only for given job state
--jobid=JOBID Only for given job ID
--user=USER Only for given user
--node=NODE Only for given node
--nodestate=NODESTATE
Only for given node state (affects only --get-node*
--get-qlist-* --get-ibswitch-* actions)
--incl-finished Include finished jobs
```
---8<--- "resource_accounting.md"
---8<--- "mathjax.md"
# Accessing the Cluster
## Shell Access
The Anselm cluster is accessed by SSH protocol via login nodes login1 and login2 at address anselm.it4i.cz. The login nodes may be addressed specifically, by prepending the login node name to the address.
| Login address | Port | Protocol | Login node |
| --------------------- | ---- | -------- | -------------------------------------------- |
| anselm.it4i.cz | 22 | ssh | round-robin DNS record for login1 and login2 |
| login1.anselm.it4i.cz | 22 | ssh | login1 |
| login2.anselm.it4i.cz | 22 | ssh | login2 |
The authentication is by the [private key](../general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys/)
!!! note
Please verify SSH fingerprints during the first logon. They are identical on all login nodes:
md5:
29:b3:f4:64:b0:73:f5:6f:a7:85:0f:e0:0d:be:76:bf (DSA)
d4:6f:5c:18:f4:3f:70:ef:bc:fc:cc:2b:fd:13:36:b7 (RSA)
sha256:
LX2034TYy6Lf0Q7Zf3zOIZuFlG09DaSGROGBz6LBUy4 (DSA)
+DcED3GDoA9piuyvQOho+ltNvwB9SJSYXbB639hbejY (RSA)
Private key authentication:
On **Linux** or **Mac**, use
```console
$ ssh -i /path/to/id_rsa username@anselm.it4i.cz
```
If you see warning message "UNPROTECTED PRIVATE KEY FILE!", use this command to set lower permissions to private key file.
```console
$ chmod 600 /path/to/id_rsa
```
On **Windows**, use [PuTTY ssh client](../general/accessing-the-clusters/shell-access-and-data-transfer/putty.md).
After logging in, you will see the command prompt:
```console
_
/\ | |
/ \ _ __ ___ ___| |_ __ ___
/ /\ \ | '_ \/ __|/ _ \ | '_ ` _ \
/ ____ \| | | \__ \ __/ | | | | | |
/_/ \_\_| |_|___/\___|_|_| |_| |_|
http://www.it4i.cz/?lang=en
Last login: Tue Jul 9 15:57:38 2013 from your-host.example.com
[username@login2.anselm ~]$
```
Example to the cluster login:
!!! note
The environment is **not** shared between login nodes, except for [shared filesystems](storage/#shared-filesystems).
## Data Transfer
Data in and out of the system may be transferred by the [scp](http://en.wikipedia.org/wiki/Secure_copy) and sftp protocols. (Not available yet.) In case large volumes of data are transferred, use dedicated data mover node dm1.anselm.it4i.cz for increased performance.
| Address | Port | Protocol |
| --------------------- | ---- | --------- |
| anselm.it4i.cz | 22 | scp, sftp |
| login1.anselm.it4i.cz | 22 | scp, sftp |
| login2.anselm.it4i.cz | 22 | scp, sftp |
| dm1.anselm.it4i.cz | 22 | scp, sftp |
The authentication is by the [private key](../general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys/)
!!! note
Data transfer rates up to **160MB/s** can be achieved with scp or sftp.
1TB may be transferred in 1:50h.
To achieve 160MB/s transfer rates, the end user must be connected by 10G line all the way to IT4Innovations and use computer with fast processor for the transfer. Using Gigabit ethernet connection, up to 110MB/s may be expected. Fast cipher (aes128-ctr) should be used.
!!! note
If you experience degraded data transfer performance, consult your local network provider.
On linux or Mac, use scp or sftp client to transfer the data to Anselm:
```console
$ scp -i /path/to/id_rsa my-local-file username@anselm.it4i.cz:directory/file
```
```console
$ scp -i /path/to/id_rsa -r my-local-dir username@anselm.it4i.cz:directory
```
or
```console
$ sftp -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz
```
Very convenient way to transfer files in and out of the Anselm computer is via the fuse filesystem [sshfs](http://linux.die.net/man/1/sshfs)
```console
$ sshfs -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz:. mountpoint
```
Using sshfs, the users Anselm home directory will be mounted on your local computer, just like an external disk.
Learn more on ssh, scp and sshfs by reading the manpages
```console
$ man ssh
$ man scp
$ man sshfs
```
On Windows, use [WinSCP client](http://winscp.net/eng/download.php) to transfer the data. The [win-sshfs client](http://code.google.com/p/win-sshfs/) provides a way to mount the Anselm filesystems directly as an external disc.
More information about the shared file systems is available [here](storage/).
## Connection Restrictions
Outgoing connections, from Anselm Cluster login nodes to the outside world, are restricted to following ports:
| Port | Protocol |
| ---- | -------- |
| 22 | ssh |
| 80 | http |
| 443 | https |
| 9418 | git |
!!! note
Please use **ssh port forwarding** and proxy servers to connect from Anselm to all other remote ports.
Outgoing connections, from Anselm Cluster compute nodes are restricted to the internal network. Direct connections form compute nodes to outside world are cut.
## Port Forwarding
### Port Forwarding From Login Nodes
!!! note
Port forwarding allows an application running on Anselm to connect to arbitrary remote host and port.
It works by tunneling the connection from Anselm back to users workstation and forwarding from the workstation to the remote host.
Pick some unused port on Anselm login node (for example 6000) and establish the port forwarding:
```console
$ ssh -R 6000:remote.host.com:1234 anselm.it4i.cz
```
In this example, we establish port forwarding between port 6000 on Anselm and port 1234 on the remote.host.com. By accessing localhost:6000 on Anselm, an application will see response of remote.host.com:1234. The traffic will run via users local workstation.
Port forwarding may be done **using PuTTY** as well. On the PuTTY Configuration screen, load your Anselm configuration first. Then go to Connection->SSH->Tunnels to set up the port forwarding. Click Remote radio button. Insert 6000 to Source port textbox. Insert remote.host.com:1234. Click Add button, then Open.
Port forwarding may be established directly to the remote host. However, this requires that user has ssh access to remote.host.com
```console
$ ssh -L 6000:localhost:1234 remote.host.com
```
!!! note
Port number 6000 is chosen as an example only. Pick any free port.
### Port Forwarding From Compute Nodes
Remote port forwarding from compute nodes allows applications running on the compute nodes to access hosts outside Anselm Cluster.
First, establish the remote port forwarding form the login node, as [described above](#port-forwarding-from-login-nodes).
Second, invoke port forwarding from the compute node to the login node. Insert following line into your jobscript or interactive shell
```console
$ ssh -TN -f -L 6000:localhost:6000 login1
```
In this example, we assume that port forwarding from login1:6000 to remote.host.com:1234 has been established beforehand. By accessing localhost:6000, an application running on a compute node will see response of remote.host.com:1234
### Using Proxy Servers
Port forwarding is static, each single port is mapped to a particular port on remote host. Connection to other remote host, requires new forward.
!!! note
Applications with inbuilt proxy support, experience unlimited access to remote hosts, via single proxy server.
To establish local proxy server on your workstation, install and run SOCKS proxy server software. On Linux, sshd demon provides the functionality. To establish SOCKS proxy server listening on port 1080 run:
```console
$ ssh -D 1080 localhost
```
On Windows, install and run the free, open source [Sock Puppet](http://sockspuppet.com/) server.
Once the proxy server is running, establish ssh port forwarding from Anselm to the proxy server, port 1080, exactly as [described above](#port-forwarding-from-login-nodes).
```console
$ ssh -R 6000:localhost:1080 anselm.it4i.cz
```
Now, configure the applications proxy settings to **localhost:6000**. Use port forwarding to access the [proxy server from compute nodes](#port-forwarding-from-compute-nodes) as well.
## Graphical User Interface
* The [X Window system](../general/accessing-the-clusters/graphical-user-interface/x-window-system/) is a principal way to get GUI access to the clusters.
* The [Virtual Network Computing](../general/accessing-the-clusters/graphical-user-interface/vnc/) is a graphical [desktop sharing](http://en.wikipedia.org/wiki/Desktop_sharing) system that uses the [Remote Frame Buffer protocol](http://en.wikipedia.org/wiki/RFB_protocol) to remotely control another [computer](http://en.wikipedia.org/wiki/Computer).
## VPN Access
* Access to IT4Innovations internal resources via [VPN](../general/accessing-the-clusters/vpn-access/).
This diff is collapsed.
# Compute Nodes
Barbora is a cluster of x86-64 Intel based nodes built with BullSequana Computing technology. The cluster contains three types of compute nodes.
## Compute Nodes Without Accelerators
* 192 nodes
* 6912 cores in total
* 2x Intel Cascade Lake 6240, 18-core, 2.6 GHz processors per node
* 192 GB DDR4 2933MT/s of physical memory per node (12x 16 GB)
* BullSequana X1120 blade servers
* 2995,2 GFLOP/s per compute node
* 1x 1 GB Ethernet
* 1x HDR100 IB port
* 3 computes nodes per X1120 blade server
* cn[1-192]
![](img/BullSequanaX1120.png)
## Compute Nodes With a GPU Accelerator
* 8 nodes
* 192 cores in total
* two Intel Skylake Gold 6126, 12-core, 2.6 GHz processors per node
* 192 GB DDR4 2933MT/s with ECC of physical memory per node (12x 16 GB)
* 4x GPU accelerator NVIDIA Tesla V100-SXM2 per node
* Bullsequana X410-E5 NVLink-V blade servers
* 1996,8 GFLOP/s per compute nodes
* GPU-tp-GPU All-to-All NVLINK 2.0, GPU-Direct
* 1 GB Ethernet
* 2x HDR100 IB ports
* cn[194-200]
![](img/BullSequanaX410E5GPUNVLink.jpg)
## Fat Compute Node
* 1x BullSequana X808 server
* 128 cores in total
* 8 Intel Skylake 8153, 16-core, 2.0 GHz, 125W
* 6144 GiB DDR4 2667MT/s of physical memory per node (92x 64 GB)
* 2x HDR100 IB port
* 8192 GFLOP/s
* cn[201]
![](img/BullSequanaX808.jpg)
## Compute Node Summary
| Node type | Count | Range | Memory | Cores | Queues |
| ---------------------------- | ----- | ----------- | ------ | ----------- | -------------------------- |
| Nodes without an accelerator | 189 | cn[1-189] | 192GB | 36 @ 2.6 GHz | qexp, qprod, qlong, qfree |
| Nodes with a GPU accelerator | 8 | cn[190-197] | 192GB | 24 @ 2.6 GHz | qnvidia |
| Fat compute nodes | 1 | cn[198] | 6144GiB | 128 @ 2.0 GHz | qfat |
## Processor Architecture
Barbora is equipped with Intel Cascade Lake processors Intel Xeon 6240 (nodes without accelerators), Intel Skylake Gold 6126 (nodes with accelerators) and Intel Skylake Platinum 8153.
### Intel [Cascade Lake 6240][d]
Cascade Lake core is largely identical to that of [Skylake's][a]. For in-depth detail of the Skylake core/pipeline see [Skylake (client) § Pipeline][b].
Xeon Gold 6240 is a 64-bit 18-core x86 multi-socket high performance server microprocessor set to be introduced by Intel in late 2018. This chip supports up to 4-way multiprocessing. The Gold 6240, which is based on the Cascade Lake microarchitecture and is manufactured on a 14 nm process, sports 2 AVX-512 FMA units as well as three Ultra Path Interconnect links. This microprocessor, which operates at 2.6 GHz with a TDP of ? W and a turbo boost frequency of up to 3.9 GHz, supports up ? GiB of hexa-channel DDR4-2666 ECC memory.
* **Family**: Xeon Gold
* **Cores**: 18
* **Threads**: 36
* **L1I Cache**: 576 KiB, 18x32 KiB, 8-way set associative
* **L1D Cache**: 576 KiB, 18x32 KiB, 8-way set associative, write-back
* **L2 Cache**: 18 MiB, 18x1 MiB, 16-way set associative, write-back
* **L3 Cache**: 24.75 MiB, 18x1.375 MiB, 11-way set associative, write-back
* **Instructions**: x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA3, F16C, BMI, BMI2, VT-x, VT-d, TXT, TSX, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVE, SGX, MPX, AVX-512 (New instructions for [Vector Neural Network Instructions][c])
* **Frequency**: 2.6 GHz
* **Max turbo**: 3.9 GHz
* **Proccess**: 14 nm
* **TDP**: 140+ W
### Intel [Skylake Gold 6126][e]
Xeon Gold 6126 is a 64-bit dodeca-core x86 multi-socket high performance server microprocessor introduced by Intel in mid-2017. This chip supports up to 4-way multiprocessing. The Gold 6126, which is based on the server configuration of the Skylake microarchitecture and is manufactured on a 14 nm+ process, sports 2 AVX-512 FMA units as well as three Ultra Path Interconnect links. This microprocessor, which operates at 2.6 GHz with a TDP of 125 W and a turbo boost frequency of up to 3.7 GHz, supports up 768 GiB of hexa-channel DDR4-2666 ECC memory.
* **Family**: Xeon Gold
* **Cores**: 12
* **Threads**: 24
* **L1I Cache**: 384 KiB, 12x32 KiB, 8-way set associative
* **L1D Cache**: 384 KiB, 12x32 KiB, 8-way set associative, write-back
* **L2 Cache**: 12 MiB, 12x1 MiB, 16-way set associative, write-back
* **L3 Cache**: 19.25 MiB, 14x1.375 MiB, 11-way set associative, write-back
* **Instructions**: x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA3, F16C, BMI, BMI2, VT-x, VT-d, TXT, TSX, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVE, SGX, MPX, AVX-512
* **Frequency**: 2.6 GHz
* **Max turbo**: 3.7 GHz
* **Proccess**: 14 nm
* **TDP**: 125 W
### Intel [Skylake Platinum 8153][f]
Xeon Platinum 8153 is a 64-bit 16-core x86 multi-socket highest performance server microprocessor introduced by Intel in mid-2017. This chip supports up to 8-way multiprocessing. The Platinum 8153, which is based on the server configuration of the Skylake microarchitecture and is manufactured on a 14 nm+ process, sports 2 AVX-512 FMA units as well as three Ultra Path Interconnect links. This microprocessor, which operates at 2 GHz with a TDP of 125 W and a turbo boost frequency of up to 2.8 GHz, supports up 768 GiB of hexa-channel DDR4-2666 ECC memory.
* **Family**: Xeon Platinum
* **Cores**: 16
* **Threads**: 32
* **L1I Cache**: 512 KiB, 16x32 KiB, 8-way set associative
* **L1D Cache**: 512 KiB, 16x32 KiB, 8-way set associative, write-back
* **L2 Cache**: 16 MiB, 16x1 MiB, 16-way set associative, write-back
* **L3 Cache**: 22 MiB, 16x1.375 MiB, 11-way set associative, write-back
* **Instructions**: x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA3, F16C, BMI, BMI2, VT-x, VT-d, TXT, TSX, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVE, SGX, MPX, AVX-512
* **Frequency**: 2.0 GHz
* **Max turbo**: 2.8 GHz
* **Proccess**: 14 nm
* **TDP**: 125 W
## GPU Accelerator
Barbora is equipped with [NVIDIA Tesla V100-SXM2][g] accelerator.
![](img/gpu-v100.png)
|NVIDIA Tesla V100-SXM2||
| --- | --- |
| GPU Architecture | NVIDIA Volta |
| NVIDIA Tensor| Cores: 640 |
| NVIDIA CUDA® Cores | 5 120 |
| Double-Precision Performance | 7.8 TFLOP/s |
| Single-Precision Performance | 15.7 TFLOP/s |
| Tensor Performanc | 125 TFLOP/s |
| GPU Memory | 16 GB HBM2 |
| Memory Bandwidth | 900 GB/sec |
| ECC | Yes |
| Interconnect Bandwidth | 300 GB/sec |
| System Interface | NVIDIA NVLink |
| Form Factor | SXM2 |
| Max Power Comsumption | 300 W |
| Thermal Solution | Passive |
| Compute APIs | CUDA, DirectCompute,OpenCLTM, OpenACC |
[a]: https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(server)#Core
[b]: https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Pipeline
[c]: https://en.wikichip.org/wiki/x86/avx512vnni
[d]: https://en.wikichip.org/wiki/intel/xeon_gold/6240
[e]: https://en.wikichip.org/wiki/intel/xeon_gold/6126
[f]: https://en.wikichip.org/wiki/intel/xeon_platinum/8153
[g]: https://images.nvidia.com/content/technologies/volta/pdf/tesla-volta-v100-datasheet-letter-fnl-web.pdf
# Hardware Overview
The Barbora cluster consists of 201 computational nodes named **cn[1-201]** of which 192 are regular compute nodes, 8 are GPU Tesla V100 accelerated nodes and 1 are fat nodes. Each node is a powerful x86-64 computer, equipped with 36/24/128 cores (18-core Intel Cascade Lake 6240 / 12-core Intel Skylake Gold 6126 / 16-core Intel Skylake 8153), at least 192 GB of RAM. User access to the Barbora cluster is provided by two login nodes **login[1,2]**. The nodes are interlinked through high speed InfiniBand and Ethernet networks.
The Fat nodes are equipped with a large amount (6144 GB) of memory. Virtualization infrastructure provides resources to run long term servers and services in virtual mode. Accelerated nodes, fat nodes, and virtualization infrastructure are available [upon request][a] from a PI.
**There are four types of compute nodes:**
* 192 compute nodes without an accelerator
* 8 compute nodes with a GPU accelerator - 4x NVIDIA Tesla V100-SXM2
* 1 fat nodes - equipped with 6144 GB of RAM
[More about Compute nodes][1].
GPU and accelerated nodes are available upon request, see the [Resources Allocation Policy][2].