Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • MPDATABenchmark
  • Urx
  • anselm2
  • hot_fix
  • john_branch
  • master
  • mkdocs_update
  • patch-1
  • pbs
  • salomon_upgrade
  • tabs
  • virtual_environment2
  • 20180621-before_revision
  • 20180621-revision
14 results

Target

Select target project
No results found
Select Git revision
  • chat
  • kru0052-master-patch-91081
  • lifecycle
  • master
  • 20180621-before_revision
  • 20180621-revision
6 results
Show changes
361 files
+ 4056
0
Compare changes
  • Side-by-side
  • Inline

Files

.gitignore

0 → 100644
+3 −0
Original line number Original line Diff line number Diff line
site/
scripts/*.csv
venv/

.gitlab-ci.yml

0 → 100644
+141 −0
Original line number Original line Diff line number Diff line
stages:
  - test
  - build
  - deploy
  - after_test

variables:
    PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"

docs:
  stage: test
  image: davidhrbac/docker-mdcheck:latest
  allow_failure: true
  script:
  - mdl -r ~MD024,~MD013,~MD033,~MD014,~MD026,~MD037,~MD036,~MD010,~MD029 *.md docs.it4i # BUGS

capitalize:
  stage: test
  image: davidhrbac/docker-mkdocscheck:latest
  # allow_failure: true
  script:
  - find mkdocs.yml docs.it4i/ \( -name '*.md' -o -name '*.yml' \) -print0 | xargs -0 -n1 scripts/titlemd_test.py

    #spell check:
    #stage: test
    #image: davidhrbac/docker-npmcheck:latest
    #allow_failure: true
    #script:
    #- npm i markdown-spellcheck -g
    #- mdspell '**/*.md' '!docs.it4i/module*.md' -rns --en-us

ext_links:
  stage: test
  image: davidhrbac/docker-mdcheck:latest
  allow_failure: true
  after_script:
  # remove JSON results
  - rm *.json
  script:
  - find docs.it4i/ -name '*.md' -exec grep --color -l http {} + | xargs awesome_bot -t 10 --allow-dupe --allow-redirect
  only:
  - master

404s:
  stage: after_test
  image: davidhrbac/docker-mkdocscheck:latest
  script:
  - wget -V
  - echo https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
  - wget --spider -e robots=off -o wget.log -r -p https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
  after_script:
  - sed -n '/^Found .* broken links.$/,$p' wget.log

mkdocs:
  stage: build
  image: davidhrbac/docker-mkdocscheck:latest
  cache:
    paths:
      - .cache/pip
      - venv/
  before_script:
  - python -V               # Print out python version for debugging
  - pip install virtualenv
  - virtualenv venv
  - source venv/bin/activate
  - pip install -r requirements.txt
  script:
  - mkdocs -V
    # add version to footer
  - bash scripts/add_version.sh
    # get modules list from clusters
  - bash scripts/get_modules.sh
    # generate site_url
  - (if [ "${CI_BUILD_REF_NAME}" != 'master' ]; then sed -i "s/\(site_url.*$\)/\1devel\/$CI_BUILD_REF_NAME\//" mkdocs.yml;fi);
    # generate ULT for code link
  - sed -i "s/master/$CI_BUILD_REF_NAME/g" material-new/partials/toc.html
    # regenerate modules matrix
  - python scripts/modules-matrix.py > docs.it4i/modules-matrix.md
  - python scripts/modules-json.py > docs.it4i/modules-matrix.json
  - curl -f0 https://scs-test.it4i.cz/devel/apidocs/master/scs_api.server_public.md -o docs.it4i/apiv1.md
    # build pages
  - mkdocs build
    # compress search_index.json
    #- bash scripts/clean_json.sh site/mkdocs/search_index.json
    # replace broken links in 404.html
  - sed -i 's,href="" title=",href="/" title=",g' site/404.html
  - cp site/404.html site/403.html
  - sed -i 's/404 - Not found/403 - Forbidden/g' site/403.html
    # compress sitemap
  - gzip < site/sitemap.xml > site/sitemap.xml.gz
  artifacts:
    paths:
    - site
    expire_in: 1 week

deploy to stage:
  environment: stage
  stage: deploy
  image: davidhrbac/docker-mkdocscheck:latest
  before_script:
  # install ssh-agent
  - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
  - 'which rsync || ( apt-get update -y && apt-get install rsync -y )'
  # run ssh-agent
  - eval $(ssh-agent -s)
  # add ssh key stored in SSH_PRIVATE_KEY variable to the agent store
  - ssh-add <(echo "$SSH_PRIVATE_KEY")
  # disable host key checking (NOTE: makes you susceptible to man-in-the-middle attacks)
  # WARNING: use only in docker container, if you use it with shell you will overwrite your user's ssh config
  - mkdir -p ~/.ssh
  - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
  - useradd -lM nginx
  script:
  - chown nginx:nginx site -R
  - rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
  only:
  - branches@sccs/docs.it4i.cz

deploy to production:
  environment: production
  stage: deploy
  image: davidhrbac/docker-mkdocscheck:latest
  before_script:
  # install ssh-agent
  - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
  - 'which rsync || ( apt-get update -y && apt-get install rsync -y )'
  # run ssh-agent
  - eval $(ssh-agent -s)
  # add ssh key stored in SSH_PRIVATE_KEY variable to the agent store
  - ssh-add <(echo "$SSH_PRIVATE_KEY")
  # disable host key checking (NOTE: makes you susceptible to man-in-the-middle attacks)
  # WARNING: use only in docker container, if you use it with shell you will overwrite your user's ssh config
  - mkdir -p ~/.ssh
  - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
  - useradd -lM nginx
  script:
  - chown nginx:nginx site -R
  - rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/site/
  only:
  - master@sccs/docs.it4i.cz
  when: manual

.spelling

0 → 100644
+797 −0
Original line number Original line Diff line number Diff line
CAE
CUBE
GPU
GSL
LMGC90
LS-DYNA
MAPDL
GPI-2
COM
.ssh
Anselm
IT4I
IT4Innovations
PBS
Salomon
TurboVNC
VNC
DDR3
DIMM
InfiniBand
CUDA
ORCA
COMSOL
API
GNU
CUDA
NVIDIA
LiveLink
MATLAB
Allinea
LLNL
Vampir
Doxygen
VTune
TotalView
Valgrind
ParaView
OpenFOAM
MAX_FAIRSHARE
MPI4Py
MPICH2
PETSc
Trilinos
FFTW
HDF5
BiERapp
AVX
AVX2
JRE
JDK
QEMU
VMware
VirtualBox
NUMA
SMP
BLAS
LAPACK
FFTW3
Dongarra
OpenCL
cuBLAS
CESNET
Jihlava
NVIDIA
Xeon
ANSYS
CentOS
RHEL
DDR4
DIMMs
GDDR5
EasyBuild
e.g.
MPICH
MVAPICH2
OpenBLAS
ScaLAPACK
PAPI
SGI
UV2000
400GB
Mellanox
RedHat
ssh.du1.cesnet.cz
ssh.du2.cesnet.cz
ssh.du3.cesnet.cz
DECI
supercomputing
AnyConnect
X11
backfilling
backfilled
SCP
Lustre
QDR
TFLOP
ncpus
myjob
pernode
mpiprocs
ompthreads
qprace
runtime
SVS
ppn
Multiphysics
aeroacoustics
turbomachinery
CFD
LS-DYNA
APDL
MAPDL
multiphysics
AUTODYN
RSM
Molpro
initio
parallelization
NWChem
SCF
ISV
profiler
Pthreads
profilers
OTF
PAPI
PCM
uncore
pre-processing
prepend
CXX
prepended
POMP2
Memcheck
unaddressable
OTF2
GPI-2
GASPI
GPI
MKL
IPP
TBB
GSL
Omics
VNC
Scalasca
IFORT
interprocedural
IDB
cloop
qcow
qcow2
vmdk
vdi
virtio
paravirtualized
Gbit
tap0
UDP
TCP
preload
qfat
Rmpi
DCT
datasets
dataset
preconditioners
partitioners
PARDISO
PaStiX
SuiteSparse
SuperLU
ExodusII
NetCDF
ParMETIS
multigrid
HYPRE
SPAI
Epetra
EpetraExt
Tpetra
64-bit
Belos
GMRES
Amesos
IFPACK
preconditioner
Teuchos
Makefiles
SAXPY
NVCC
VCF
HGMD
HUMSAVAR
ClinVar
indels
CIBERER
exomes
tmp
SSHFS
RSYNC
unmount
Cygwin
CygwinX
RFB
TightVNC
TigerVNC
GUIs
XLaunch
UTF-8
numpad
PuTTYgen
OpenSSH
IE11
x86
r21u01n577
7120P
interprocessor
IPN
toolchains
toolchain
APIs
easyblocks
GM200
GeForce
GTX
IRUs
ASIC
backplane
ICEX
IRU
PFLOP
T950B
ifconfig
inet
addr
checkbox
appfile
programmatically
http
https
filesystem
phono3py
HDF
splitted
automize
llvm
PGI
GUPC
BUPC
IBV
Aislinn
nondeterminism
stdout
stderr
i.e.
pthreads
uninitialised
broadcasted
ITAC
hotspots
Bioinformatics
semiempirical
DFT
polyfill
ES6
HTML5Rocks
minifiers
CommonJS
PhantomJS
bundlers
Browserify
versioning
isflowing
ispaused
NPM
sublicense
Streams2
Streams3
blogpost
GPG
mississippi
Uint8Arrays
Uint8Array
endianness
styleguide
noop
MkDocs
 - docs.it4i/anselm-cluster-documentation/environment-and-modules.md
MODULEPATH
bashrc
PrgEnv-gnu
bullx
MPI
PrgEnv-intel
EasyBuild
 - docs.it4i/anselm-cluster-documentation/capacity-computing.md
capacity.zip
README
 - docs.it4i/anselm-cluster-documentation/compute-nodes.md
DIMMs
 - docs.it4i/anselm-cluster-documentation/hardware-overview.md
cn
K20
Xeon
x86-64
Virtualization
virtualization
NVIDIA
5110P
SSD
lscratch
login1
login2
dm1
Rpeak
LINPACK
Rmax
E5-2665
E5-2470
P5110
isw
 - docs.it4i/anselm-cluster-documentation/introduction.md
RedHat
 - docs.it4i/anselm-cluster-documentation/job-priority.md
walltime
qexp
_List.fairshare
_time
_FAIRSHARE
1E6
 - docs.it4i/anselm-cluster-documentation/job-submission-and-execution.md
15209.srv11
qsub
15210.srv11
pwd
cn17.bullx
cn108.bullx
cn109.bullx
cn110.bullx
pdsh
hostname
SCRDIR
mkdir
mpiexec
qprod
Jobscript
jobscript
cn108
cn109
cn110
Name0
cn17
_NODEFILE
_O
_WORKDIR
mympiprog.x
_JOBID
myprog.x
openmpi
 - docs.it4i/anselm-cluster-documentation/network.md
ib0
 - docs.it4i/anselm-cluster-documentation/prace.md
PRACE
qfree
it4ifree
it4i.portal.clients
prace
1h
 - docs.it4i/anselm-cluster-documentation/shell-and-data-access.md
VPN
 - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-cfx.md
ANSYS
CFX
cfx.pbs
_r
ane3fl
 - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.md
mapdl.pbs
_dy
 - docs.it4i/anselm-cluster-documentation/software/ansys/ls-dyna.md
HPC
lsdyna.pbs
 - docs.it4i/anselm-cluster-documentation/software/chemistry/molpro.md
OpenMP
 - docs.it4i/anselm-cluster-documentation/software/compilers.md
Fortran
 - docs.it4i/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.md
E5-2600
 - docs.it4i/anselm-cluster-documentation/software/debuggers/score-p.md
Makefile
 - docs.it4i/anselm-cluster-documentation/software/gpi2.md
gcc
cn79
helloworld
_gpi.c
ibverbs
gaspi
_logger
 - docs.it4i/anselm-cluster-documentation/software/intel-suite/intel-compilers.md
Haswell
CPUs
ipo
O3
vec
xAVX
omp
simd
ivdep
pragmas
openmp
xCORE-AVX2
axCORE-AVX2
 - docs.it4i/anselm-cluster-documentation/software/kvirtualization.md
rc.local
runlevel
RDP
DHCP
DNS
SMB
VDE
smb.conf
TMPDIR
run.bat.
slirp
NATs
 - docs.it4i/anselm-cluster-documentation/software/mpi/mpi4py-mpi-for-python.md
NumPy
 - docs.it4i/anselm-cluster-documentation/software/numerical-languages/matlab_1314.md
mpiLibConf.m
matlabcode.m
output.out
matlabcodefile
sched
_feature
 - docs.it4i/anselm-cluster-documentation/software/numerical-languages/matlab.md
UV2000
maxNumCompThreads
SalomonPBSPro
 - docs.it4i/anselm-cluster-documentation/software/numerical-languages/octave.md
_THREADS
_NUM
 - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/trilinos.md
CMake-aware
Makefile.export
_PACKAGE
_CXX
_COMPILER
_INCLUDE
_DIRS
_LIBRARY
 - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.md
ansysdyna.pbs
 - docs.it4i/anselm-cluster-documentation/software/ansys/ansys.md
svsfem.cz
_
 - docs.it4i/anselm-cluster-documentation/software/debuggers/valgrind.md
libmpiwrap-amd64-linux
O0
valgrind
malloc
_PRELOAD
 - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.md
cn204
_LIBS
MAGMAROOT
_magma
_server
_anselm
_from
_mic.sh
_dgetrf
_mic
_03.pdf
 - docs.it4i/anselm-cluster-documentation/software/paraview.md
cn77
localhost
v4.0.1
 - docs.it4i/anselm-cluster-documentation/storage.md
ssh.du1.cesnet.cz
Plzen
ssh.du2.cesnet.cz
ssh.du3.cesnet.cz
tier1
_home
_cache
_tape
 - docs.it4i/salomon/environment-and-modules.md
icc
ictce
ifort
imkl
intel
gompi
goolf
BLACS
iompi
iccifort
 - docs.it4i/salomon/hardware-overview.md
HW
E5-4627v2
 - docs.it4i/salomon/job-submission-and-execution.md
15209.isrv5
r21u01n577
r21u02n578
r21u03n579
r21u04n580
qsub
15210.isrv5
pwd
r2i5n6.ib0.smc.salomon.it4i.cz
r4i6n13.ib0.smc.salomon.it4i.cz
r4i7n2.ib0.smc.salomon.it4i.cz
pdsh
r2i5n6
r4i6n13
r4i7n
r4i7n2
r4i7n0
SCRDIR
myjob
mkdir
mympiprog.x
mpiexec
myprog.x
r4i7n0.ib0.smc.salomon.it4i.cz
 - docs.it4i/salomon/7d-enhanced-hypercube.md
cns1
cns576
r1i0n0
r4i7n17
cns577
cns1008
r37u31n1008
7D
 - docs.it4i/anselm-cluster-documentation/resources-allocation-policy.md
qsub
it4ifree
it4i.portal.clients
x86
x64
 - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-fluent.md
anslic
_admin
 - docs.it4i/anselm-cluster-documentation/software/chemistry/nwchem.md
_DIR
 - docs.it4i/anselm-cluster-documentation/software/comsol-multiphysics.md
EDU
comsol
_matlab.pbs
_job.m
mphstart
 - docs.it4i/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.md
perf-report
perf
txt
html
mympiprog
_32p
 - docs.it4i/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.md
Hotspots
 - docs.it4i/anselm-cluster-documentation/software/debuggers/scalasca.md
scorep
 - docs.it4i/anselm-cluster-documentation/software/isv_licenses.md
edu
ansys
_features
_state.txt
f1
matlab
acfd
_ansys
_acfd
_aa
_comsol
HEATTRANSFER
_HEATTRANSFER
COMSOLBATCH
_COMSOLBATCH
STRUCTURALMECHANICS
_STRUCTURALMECHANICS
_matlab
_Toolbox
_Image
_Distrib
_Comp
_Engine
_Acquisition
pmode
matlabpool
 - docs.it4i/anselm-cluster-documentation/software/mpi/mpi.md
mpirun
BLAS1
FFT
KMP
_AFFINITY
GOMP
_CPU
bullxmpi-1
mpich2
 - docs.it4i/anselm-cluster-documentation/software/mpi/Running_OpenMPI.md
bysocket
bycore
 - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/fftw.md
gcc3.3.3
pthread
fftw3
lfftw3
_threads-lfftw3
_omp
icc3.3.3
FFTW2
gcc2.1.5
fftw2
lfftw
_threads
icc2.1.5
fftw-mpi3
_mpi
fftw3-mpi
fftw2-mpi
IntelMPI
 - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/gsl.md
dwt.c
mkl
lgsl
 - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/hdf5.md
icc
hdf5
_INC
_SHLIB
_CPP
_LIB
_F90
gcc49
 - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/petsc.md
_Dist
 - docs.it4i/anselm-cluster-documentation/software/nvidia-cuda.md
lcublas
 - docs.it4i/anselm-cluster-documentation/software/operating-system.md
6.x
 - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.md
startxwin
cygwin64binXWin.exe
tcp
 - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.md
Xming
XWin.exe.
 - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.md
_rsa.ppk
 - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.md
_keys
organization.example.com
_rsa
 - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md
vpnui.exe
 - docs.it4i/salomon/ib-single-plane-topology.md
36-port
Mcell.pdf
r21-r38
nodes.pdf
 - docs.it4i/salomon/introduction.md
E5-2680v3
 - docs.it4i/salomon/network.md
r4i1n0
r4i1n1
r4i1n2
r4i1n3
ip
 - docs.it4i/salomon/software/ansys/setting-license-preferences.md
ansys161
 - docs.it4i/salomon/software/ansys/workbench.md
mpifile.txt
solvehandlers.xml
 - docs.it4i/salomon/software/chemistry/phono3py.md
vasprun.xml
disp-XXXXX
disp
_fc3.yaml
ir
_grid
_points.yaml
gofree-cond1
 - docs.it4i/salomon/software/compilers.md
HPF
 - docs.it4i/salomon/software/comsol/licensing-and-available-versions.md
ver
 - docs.it4i/salomon/software/debuggers/aislinn.md
test.cpp
 - docs.it4i/salomon/software/debuggers/intel-vtune-amplifier.md
vtune
_update1
 - docs.it4i/salomon/software/debuggers/valgrind.md
EBROOTVALGRIND
 - docs.it4i/salomon/software/intel-suite/intel-advisor.md
O2
 - docs.it4i/salomon/software/intel-suite/intel-compilers.md
UV1
 - docs.it4i/salomon/software/numerical-languages/octave.md
octcode.m
mkoctfile
 - docs.it4i/software/orca.md
pdf
 - node_modules/es6-promise/README.md
rsvp.js
es6-promise
es6-promise-min
Node.js
testem
 - node_modules/spawn-sync/lib/json-buffer/README.md
node.js
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/doc/wg-meetings/2015-01-30.md
WG
domenic
mikeal
io.js
sam
calvin
whatwg
compat
mathias
isaac
chris
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/core-util-is/README.md
core-util-is
v0.12.
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/isarray/README.md
isarray
Gruber
julian
juliangruber.com
NONINFRINGEMENT
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/process-nextick-args/license.md
Metcalf
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/process-nextick-args/readme.md
process-nextick-args
process.nextTick
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/string_decoder/README.md
_decoder.js
Joyent
joyent
repo
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/util-deprecate/History.md
kumavis
jsdocs
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/util-deprecate/README.md
util-deprecate
Rajlich
 - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/README.md
v7.0.0
userland
chrisdickinson
christopher.s.dickinson
gmail.com
9554F04D7259F04124DE6B476D5A82AC7E37093B
calvinmetcalf
calvin.metcalf
F3EF5F62A87FC27A22E643F714CE4FF5015AA242
Vagg
rvagg
vagg.org
DD8F2338BAE7501E3DD5AC78C273792F7D83545D
sonewman
newmansam
outlook.com
Buus
mafintosh
mathiasbuus
Denicola
domenic.me
Matteo
Collina
mcollina
matteo.collina
3ABC01543F22DD2239285CDD818674489FBC127E
 - node_modules/spawn-sync/node_modules/concat-stream/readme.md
concat-stream
concat
cb
 - node_modules/spawn-sync/node_modules/os-shim/README.md
0.10.x
os.tmpdir
os.endianness
os.EOL
os.platform
os.arch
0.4.x
Aparicio
Adesis
Netlife
S.L
 - node_modules/spawn-sync/node_modules/try-thread-sleep/node_modules/thread-sleep/README.md
node-pre-gyp
npm
 - node_modules/spawn-sync/README.md
iojs
>>>>>>> readme

README.md

0 → 100644
+49 −0
Original line number Original line Diff line number Diff line
# User documentation

This project contains IT4Innovations user documentation source.

## Development

### Install

```console
$ sudo apt install libpython-dev
$ virtualenv venv
$ source venv/bin/activate
$ pip install -r requirements.txt
```

## Environments

* [https://docs.it4i.cz - master branch](https://docs.it4i.cz - master branch)
* [https://docs.it4i.cz/devel/$BRANCH_NAME](https://docs.it4i.cz/devel/$BRANCH_NAME) - maps the branches, available only with VPN access

## URLs

* [http://facelessuser.github.io/pymdown-extensions/](http://facelessuser.github.io/pymdown-extensions/)
* [http://squidfunk.github.io/mkdocs-material/](http://squidfunk.github.io/mkdocs-material/)

```
fair-share
InfiniBand
RedHat
CentOS
Mellanox
```

## Mathematical Formulae

### Formulas are made with:

* [https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/](https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/)
* [https://www.mathjax.org/](https://www.mathjax.org/)

You can add formula to page like this:

```
$$
MAX\_FAIRSHARE * ( 1 - \frac{usage_{Project}}{usage_{Total}} )
$$
```

To enable the MathJX on page you need to enable it by adding line ```---8<--- "mathjax.md"``` at the end of file.
+326 −0
Original line number Original line Diff line number Diff line
# Capacity Computing

## Introduction

In many cases, it is useful to submit a huge (>100+) number of computational jobs into the PBS queue system. A huge number of (small) jobs is one of the most effective ways to execute embarrassingly parallel calculations, achieving the best runtime, throughput, and computer utilization.

However, executing a huge number of jobs via the PBS queue may strain the system. This strain may result in slow response to commands, inefficient scheduling, and overall degradation of performance and user experience, for all users. For this reason, the number of jobs is **limited to 100 per user, 1000 per job array**

!!! note
    Please follow one of the procedures below, in case you wish to schedule more than 100 jobs at a time.

* Use [Job arrays][1] when running a huge number of [multithread][2] (bound to one node only) or multinode (multithread across several nodes) jobs
* Use [GNU parallel][3] when running single core jobs
* Combine [GNU parallel with Job arrays][4] when running huge number of single core jobs

## Policy

1. A user is allowed to submit at most 100 jobs. Each job may be [a job array][1].
1. The array size is at most 1000 subjobs.

## Job Arrays

!!! note
    A huge number of jobs may easily be submitted and managed as a job array.

A job array is a compact representation of many jobs, called subjobs. The subjobs share the same job script, and have the same values for all attributes and resources, with the following exceptions:

* each subjob has a unique index, $PBS_ARRAY_INDEX
* job Identifiers of subjobs only differ by their indices
* the state of subjobs can differ (R,Q,...etc.)

All subjobs within a job array have the same scheduling priority and schedule as independent jobs. An entire job array is submitted through a single qsub command and may be managed by qdel, qalter, qhold, qrls, and qsig commands as a single job.

### Shared Jobscript

All subjobs in a job array use the very same, single jobscript. Each subjob runs its own instance of the jobscript. The instances execute different work controlled by the $PBS_ARRAY_INDEX variable.

Example:

Assume we have 900 input files with the name of each beginning with "file" (e. g. file001, ..., file900). Assume we would like to use each of these input files with program executable myprog.x, each as a separate job.

First, we create a tasklist file (or subjobs list), listing all tasks (subjobs) - all input files in our example:

```console
$ find . -name 'file*' > tasklist
```

Then we create the jobscript:

```bash
#!/bin/bash
#PBS -A PROJECT_ID
#PBS -q qprod
#PBS -l select=1:ncpus=16,walltime=02:00:00

# change to local scratch directory
SCR=/lscratch/$PBS_JOBID
mkdir -p $SCR ; cd $SCR || exit

# get individual tasks from tasklist with index from PBS JOB ARRAY
TASK=$(sed -n "${PBS_ARRAY_INDEX}p" $PBS_O_WORKDIR/tasklist)

# copy input file and executable to scratch
cp $PBS_O_WORKDIR/$TASK input ; cp $PBS_O_WORKDIR/myprog.x .

# execute the calculation
./myprog.x < input > output

# copy output file to submit directory
cp output $PBS_O_WORKDIR/$TASK.out
```

In this example, the submit directory holds the 900 input files, the executable myprog.x, and the jobscript file. As an input for each run, we take the filename of the input file from the created tasklist file. We copy the input file to the local scratch memory /lscratch/$PBS_JOBID, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. The myprog.x runs on one node only and must use threads to run in parallel. Be aware, that if the myprog.x **is not multithreaded**, then all the **jobs are run as single thread programs in a sequential** manner. Due to the allocation of the whole node, the accounted time is equal to the usage of the whole node, while using only 1/16 of the node!

If running a huge number of parallel multicore (in means of multinode multithread, e. g. MPI enabled) jobs is needed, then a job array approach should be used. The main difference as compared to previous examples using one node is that the local scratch memory should not be used (as it's not shared between nodes) and MPI or other techniques for parallel multinode processing has to be used properly.

### Submit the Job Array

To submit the job array, use the qsub -J command. The 900 jobs of the [example above][5] may be submitted like this:

```console
$ qsub -N JOBNAME -J 1-900 jobscript
12345[].dm2
```

In this example, we submit a job array of 900 subjobs. Each subjob will run on one full node and is assumed to take less than 2 hours (note the #PBS directives in the beginning of the jobscript file, don't forget to set your valid PROJECT_ID and desired queue).

Sometimes for testing purposes, you may need to submit a one-element only array. This is not allowed by PBSPro, but there's a workaround:

```console
$ qsub -N JOBNAME -J 9-10:2 jobscript
```

This will only choose the lower index (9 in this example) for submitting/running your job.

### Manage the Job Array

Check status of the job array using the qstat command.

```console
$ qstat -a 12345[].dm2

dm2:
                                                            Req'd Req'd   Elap
Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
12345[].dm2     user2    qprod    xx          13516   1 16    --  00:50 B 00:02
```

When the status is B it means that some subjobs are already running.
Check the status of the first 100 subjobs using the qstat command.

```console
$ qstat -a 12345[1-100].dm2

dm2:
                                                            Req'd Req'd   Elap
Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
12345[1].dm2    user2    qprod    xx          13516   1 16    --  00:50 R 00:02
12345[2].dm2    user2    qprod    xx          13516   1 16    --  00:50 R 00:02
12345[3].dm2    user2    qprod    xx          13516   1 16    --  00:50 R 00:01
12345[4].dm2    user2    qprod    xx          13516   1 16    --  00:50 Q   --
     .             .        .      .             .    .   .     .    .   .    .
     ,             .        .      .             .    .   .     .    .   .    .
12345[100].dm2 user2    qprod    xx          13516   1 16    --  00:50 Q   --
```

Delete the entire job array. Running subjobs will be killed, queueing subjobs will be deleted.

```console
$ qdel 12345[].dm2
```

Deleting large job arrays may take a while.
Display status information for all user's jobs, job arrays, and subjobs.

```console
$ qstat -u $USER -t
```

Display status information for all user's subjobs.

```console
$ qstat -u $USER -tJ
```

Read more on job arrays in the [PBSPro Users guide][6].

## GNU Parallel

!!! note
    Use GNU parallel to run many single core tasks on one node.

GNU parallel is a shell tool for executing jobs in parallel using one or more computers. A job can be a single command or a small script that has to be run for each of the lines in the input. GNU parallel is most useful when running single core jobs via the queue system on Anselm.

For more information and examples see the parallel man page:

```console
$ module add parallel
$ man parallel
```

### GNU Parallel Jobscript

The GNU parallel shell executes multiple instances of the jobscript using all cores on the node. The instances execute different work, controlled by the $PARALLEL_SEQ variable.

Example:

Assume we have 101 input files with name beginning with "file" (e. g. file001, ..., file101). Assume we would like to use each of these input files with program executable myprog.x, each as a separate single core job. We call these single core jobs tasks.

First, we create a tasklist file, listing all tasks - all input files in our example:

```console
$ find . -name 'file*' > tasklist
```

Then we create a jobscript:

```bash
#!/bin/bash
#PBS -A PROJECT_ID
#PBS -q qprod
#PBS -l select=1:ncpus=16,walltime=02:00:00

[ -z "$PARALLEL_SEQ" ] &&
{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/tasklist $0 ; }

# change to local scratch directory
SCR=/lscratch/$PBS_JOBID/$PARALLEL_SEQ
mkdir -p $SCR ; cd $SCR || exit

# get individual task from tasklist
TASK=$1

# copy input file and executable to scratch
cp $PBS_O_WORKDIR/$TASK input

# execute the calculation
cat input > output

# copy output file to submit directory
cp output $PBS_O_WORKDIR/$TASK.out
```

In this example, tasks from the tasklist are executed via the GNU parallel. The jobscript executes multiple instances of itself in parallel, on all cores of the node. Once an instace of the jobscript is finished, a new instance starts until all entries in the tasklist are processed. Currently processed entries of the joblist may be retrieved via $1 variable. The variable $TASK expands to one of the input filenames from the tasklist. We copy the input file to local scratch memory, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name.

### Submit the Job

To submit the job, use the qsub command. The 101 task job of the [example above][7] may be submitted as follows:

```console
$ qsub -N JOBNAME jobscript
12345.dm2
```

In this example, we submit a job of 101 tasks. 16 input files will be processed in parallel. The 101 tasks on 16 cores are assumed to complete in less than 2 hours.

!!! hint
    Use #PBS directives at the beginning of the jobscript file, don't forget to set your valid PROJECT_ID and desired queue.

## Job Arrays and GNU Parallel

!!! note
    Combine the Job arrays and GNU parallel for the best throughput of single core jobs

While job arrays are able to utilize all available computational nodes, the GNU parallel can be used to efficiently run multiple single-core jobs on a single node. The two approaches may be combined to utilize all available (current and future) resources to execute single core jobs.

!!! note
    Every subjob in an array runs GNU parallel to utilize all cores on the node

### GNU Parallel, Shared jobscript

A combined approach, very similar to job arrays, can be taken. A job array is submitted to the queuing system. The subjobs run GNU parallel. The GNU parallel shell executes multiple instances of the jobscript using all of the cores on the node. The instances execute different work, controlled by the $PBS_JOB_ARRAY and $PARALLEL_SEQ variables.

Example:

Assume we have 992 input files with each name beginning with "file" (e. g. file001, ..., file992). Assume we would like to use each of these input files with program executable myprog.x, each as a separate single core job. We call these single core jobs tasks.

First, we create a tasklist file, listing all tasks - all input files in our example:

```console
$ find . -name 'file*' > tasklist
```

Next we create a file, controlling how many tasks will be executed in one subjob:

```console
$ seq 32 > numtasks
```

Then we create a jobscript:

```bash
#!/bin/bash
#PBS -A PROJECT_ID
#PBS -q qprod
#PBS -l select=1:ncpus=16,walltime=02:00:00

[ -z "$PARALLEL_SEQ" ] &&
{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/numtasks $0 ; }

# change to local scratch directory
SCR=/lscratch/$PBS_JOBID/$PARALLEL_SEQ
mkdir -p $SCR ; cd $SCR || exit

# get individual task from tasklist with index from PBS JOB ARRAY and index form Parallel
IDX=$(($PBS_ARRAY_INDEX + $PARALLEL_SEQ - 1))
TASK=$(sed -n "${IDX}p" $PBS_O_WORKDIR/tasklist)
[ -z "$TASK" ] && exit

# copy input file and executable to scratch
cp $PBS_O_WORKDIR/$TASK input

# execute the calculation
cat input > output

# copy output file to submit directory
cp output $PBS_O_WORKDIR/$TASK.out
```

In this example, the jobscript executes in multiple instances in parallel, on all cores of a computing node. The variable $TASK expands to one of the input filenames from the tasklist. We copy the input file to local scratch memory, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name.  The numtasks file controls how many tasks will be run per subjob. Once a task is finished, a new task starts, until the number of tasks in the numtasks file is reached.

!!! note
    Select subjob walltime and number of tasks per subjob carefully

When deciding this values, keep in mind the following guiding rules:

1. Let n=N/16.  Inequality (n+1) \* T < W should hold. N is the number of tasks per subjob, T is expected single task walltime and W is subjob walltime. Short subjob walltime improves scheduling and job throughput.
1. The number of tasks should be modulo 16.
1. These rules are valid only when all tasks have similar task walltimes T.

### Submit the Job Array (-J)

To submit the job array, use the qsub -J command. The 992 task job of the [example above][8] may be submitted like this:

```console
$ qsub -N JOBNAME -J 1-992:32 jobscript
12345[].dm2
```

In this example, we submit a job array of 31 subjobs. Note the -J 1-992:**32**, this must be the same as the number sent to numtasks file. Each subjob will run on one full node and process 16 input files in parallel, 32 in total per subjob.  Every subjob is assumed to complete in less than 2 hours.

!!! hint
    Use #PBS directives at the beginning of the jobscript file, don't forget to set your valid PROJECT_ID and desired queue.

## Examples

Download the examples in [capacity.zip][9], illustrating the above listed ways to run a huge number of jobs. We recommend trying out the examples before using this for running production jobs.

Unzip the archive in an empty directory on Anselm and follow the instructions in the README file

```console
$ unzip capacity.zip
$ cat README
```

[1]: #job-arrays
[2]: #shared-jobscript-on-one-node
[3]: #gnu-parallel
[4]: #job-arrays-and-gnu-parallel
[5]: #array_example
[6]: ../pbspro.md
[7]: #gp_example
[8]: #combined_example
[9]: capacity.zip
+132 −0
Original line number Original line Diff line number Diff line
# Compute Nodes

## Node Configuration

Anselm is a cluster of x86-64 Intel based nodes built with Bull Extreme Computing bullx technology. The cluster contains four types of compute nodes.

### Compute Nodes Without Accelerators

* 180 nodes
* 2880 cores in total
* two Intel Sandy Bridge E5-2665, 8-core, 2.4GHz processors per node
* 64 GB of physical memory per node
* one 500GB SATA 2,5” 7,2 krpm HDD per node
* bullx B510 blade servers
* cn[1-180]

### Compute Nodes With a GPU Accelerator

* 23 nodes
* 368 cores in total
* two Intel Sandy Bridge E5-2470, 8-core, 2.3GHz processors per node
* 96 GB of physical memory per node
* one 500GB SATA 2,5” 7,2 krpm HDD per node
* GPU accelerator 1x NVIDIA Tesla Kepler K20m per node
* bullx B515 blade servers
* cn[181-203]

### Compute Nodes With a MIC Accelerator

* 4 nodes
* 64 cores in total
* two Intel Sandy Bridge E5-2470, 8-core, 2.3GHz processors per node
* 96 GB of physical memory per node
* one 500GB SATA 2,5” 7,2 krpm HDD per node
* MIC accelerator 1x Intel Phi 5110P per node
* bullx B515 blade servers
* cn[204-207]

### Fat Compute Nodes

* 2 nodes
* 32 cores in total
* 2 Intel Sandy Bridge E5-2665, 8-core, 2.4GHz processors per node
* 512 GB of physical memory per node
* two 300GB SAS 3,5” 15krpm HDD (RAID1) per node
* two 100GB SLC SSD per node
* bullx R423-E3 servers
* cn[208-209]

![](../img/bullxB510.png)
**Anselm bullx B510 servers**

### Compute Node Summary

| Node type                    | Count | Range       | Memory | Cores       | Queues                                    |
| ---------------------------- | ----- | ----------- | ------ | ----------- | --------------------------------------    |
| Nodes without an accelerator | 180   | cn[1-180]   | 64GB   | 16 @ 2.4GHz | qexp, qprod, qlong, qfree, qprace, qatlas |
| Nodes with a GPU accelerator | 23    | cn[181-203] | 96GB   | 16 @ 2.3GHz | qnvidia, qexp                             |
| Nodes with a MIC accelerator | 4     | cn[204-207] | 96GB   | 16 @ 2.3GHz | qmic, qexp                                |
| Fat compute nodes            | 2     | cn[208-209] | 512GB  | 16 @ 2.4GHz | qfat, qexp                                |

## Processor Architecture

Anselm is equipped with Intel Sandy Bridge processors Intel Xeon E5-2665 (nodes without accelerators and fat nodes) and Intel Xeon E5-2470 (nodes with accelerators). The processors support Advanced Vector Extensions (AVX) 256-bit instruction set.

### Intel Sandy Bridge E5-2665 Processor

* eight-core
* speed: 2.4 GHz, up to 3.1 GHz using Turbo Boost Technology
* peak performance:  19.2 GFLOP/s per core
* caches:
  * L2: 256 KB per core
  * L3: 20 MB per processor
* memory bandwidth at the level of the processor: 51.2 GB/s

### Intel Sandy Bridge E5-2470 Processor

* eight-core
* speed: 2.3 GHz, up to 3.1 GHz using Turbo Boost Technology
* peak performance:  18.4 GFLOP/s per core
* caches:
  * L2: 256 KB per core
  * L3: 20 MB per processor
* memory bandwidth at the level of the processor: 38.4 GB/s

Nodes equipped with Intel Xeon E5-2665 CPU have a set PBS resource attribute cpu_freq = 24, nodes equipped with Intel Xeon E5-2470 CPU have set PBS resource attribute cpu_freq = 23.

```console
$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16:cpu_freq=24 -I
```

In this example, we allocate 4 nodes, 16 cores at 2.4GHhz per node.

Intel Turbo Boost Technology is used by default,  you can disable it for all nodes of job by using resource attribute cpu_turbo_boost.

```console
$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I
```

## Memmory Architecture

The cluster contains three types of compute nodes.

### Compute Nodes Without Accelerators

* 2 sockets
* Memory Controllers are integrated into processors.
  * 8 DDR3 DIMMs per node
  * 4 DDR3 DIMMs per CPU
  * 1 DDR3 DIMMs per channel
  * Data rate support: up to 1600MT/s
* Populated memory: 8 x 8 GB DDR3 DIMM 1600 MHz

### Compute Nodes With a GPU or MIC Accelerator

* 2 sockets
* Memory Controllers are integrated into processors.
  * 6 DDR3 DIMMs per node
  * 3 DDR3 DIMMs per CPU
  * 1 DDR3 DIMMs per channel
  * Data rate support: up to 1600MT/s
* Populated memory: 6 x 16 GB DDR3 DIMM 1600 MHz

### Fat Compute Nodes

* 2 sockets
* Memory Controllers are integrated into processors.
  * 16 DDR3 DIMMs per node
  * 8 DDR3 DIMMs per CPU
  * 2 DDR3 DIMMs per channel
  * Data rate support: up to 1600MT/s
* Populated memory: 16 x 32 GB DDR3 DIMM 1600 MHz
+68 −0
Original line number Original line Diff line number Diff line
# Hardware Overview

The Anselm cluster consists of 209 computational nodes named cn[1-209] of which 180 are regular compute nodes, 23 are GPU Kepler K20 accelerated nodes, 4 are MIC Xeon Phi 5110P accelerated nodes, and 2 are fat nodes. Each node is a powerful x86-64 computer, equipped with 16 cores (two eight-core Intel Sandy Bridge processors), at least 64 GB of RAM, and a local hard drive. User access to the Anselm cluster is provided by two login nodes login[1,2]. The nodes are interlinked through high speed InfiniBand and Ethernet networks. All nodes share a 320 TB /home disk for storage of user files. The 146 TB shared /scratch storage is available for scratch data.

The Fat nodes are equipped with a large amount (512 GB) of memory. Virtualization infrastructure provides resources to run long term servers and services in virtual mode. Fat nodes and virtual servers may access 45 TB of dedicated block storage. Accelerated nodes, fat nodes, and virtualization infrastructure are available [upon request][a] from a PI.

Schematic representation of the Anselm cluster. Each box represents a node (computer) or storage capacity:

![](../img/Anselm-Schematic-Representation.png)

The cluster compute nodes cn[1-207] are organized within 13 chassis.

There are four types of compute nodes:

* 180 compute nodes without an accelerator
* 23 compute nodes with a GPU accelerator - an NVIDIA Tesla Kepler K20m
* 4 compute nodes with a MIC accelerator - an Intel Xeon Phi 5110P
* 2 fat nodes - equipped with 512 GB of RAM and two 100 GB SSD drives

[More about Compute nodes][1].

GPU and accelerated nodes are available upon request, see the [Resources Allocation Policy][2].

All of these nodes are interconnected through fast InfiniBand and Ethernet networks.  [More about the Network][3].
Every chassis provides an InfiniBand switch, marked **isw**, connecting all nodes in the chassis, as well as connecting the chassis to the upper level switches.

All of the nodes share a 360 TB /home disk for storage of user files. The 146 TB shared /scratch storage is available for scratch data. These file systems are provided by the Lustre parallel file system. There is also local disk storage available on all compute nodes in /lscratch.  [More about Storage][4].

User access to the Anselm cluster is provided by two login nodes login1, login2, and data mover node dm1. [More about accessing the cluster][5].

The parameters are summarized in the following tables:

| **In general**                              |                                              |
| ------------------------------------------- | -------------------------------------------- |
| Primary purpose                             | High Performance Computing                   |
| Architecture of compute nodes               | x86-64                                       |
| Operating system                            | Linux (CentOS)                               |
| [**Compute nodes**][1]                      |                                              |
| Total                                       | 209                                          |
| Processor cores                             | 16 (2 x 8 cores)                             |
| RAM                                         | min. 64 GB, min. 4 GB per core               |
| Local disk drive                            | yes - usually 500 GB                         |
| Compute network                             | InfiniBand QDR, fully non-blocking, fat-tree |
| w/o accelerator                             | 180, cn[1-180]                               |
| GPU accelerated                             | 23, cn[181-203]                              |
| MIC accelerated                             | 4, cn[204-207]                               |
| Fat compute nodes                           | 2, cn[208-209]                               |
| **In total**                                |                                              |
| Total theoretical peak performance  (Rpeak) | 94 TFLOP/s                                   |
| Total max. LINPACK performance  (Rmax)      | 73 TFLOP/s                                   |
| Total amount of RAM                         | 15.136 TB                                    |

| Node             | Processor                               | Memory | Accelerator          |
| ---------------- | --------------------------------------- | ------ | -------------------- |
| w/o accelerator  | 2 x Intel Sandy Bridge E5-2665, 2.4 GHz | 64 GB  | -                    |
| GPU accelerated  | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB  | NVIDIA Kepler K20m   |
| MIC accelerated  | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB  | Intel Xeon Phi 5110P |
| Fat compute node | 2 x Intel Sandy Bridge E5-2665, 2.4 GHz | 512 GB | -                    |

For more details refer to [Compute nodes][1], [Storage][4], and [Network][3].

[1]: compute-nodes.md
[2]: resources-allocation-policy.md
[3]: network.md
[4]: storage.md
[5]: shell-and-data-access.md

[a]: https://support.it4i.cz/rt
+20 −0
Original line number Original line Diff line number Diff line
# Introduction

Welcome to Anselm supercomputer cluster. The Anselm cluster consists of 209 compute nodes, totalling 3344 compute cores with 15 TB RAM, giving over 94 TFLOP/s theoretical peak performance. Each node is a powerful x86-64 computer, equipped with 16 cores, at least 64 GB of RAM, and a 500 GB hard disk drive. Nodes are interconnected through a fully non-blocking fat-tree InfiniBand network, and are equipped with Intel Sandy Bridge processors. A few nodes are also equipped with NVIDIA Kepler GPU or Intel Xeon Phi MIC accelerators. Read more in [Hardware Overview][1].

The cluster runs with an operating system which is compatible with the RedHat [Linux family][a]. We have installed a wide range of software packages targeted at different scientific domains. These packages are accessible via the [modules environment][2].

The user data shared file-system (HOME, 320 TB) and job data shared file-system (SCRATCH, 146 TB) are available to users.

The PBS Professional workload manager provides [computing resources allocations and job execution][3].

Read more on how to [apply for resources][4], [obtain login credentials][5] and [access the cluster][6].

[1]: hardware-overview.md
[2]: ../environment-and-modules.md
[3]: resources-allocation-policy.md
[4]: ../general/applying-for-resources.md
[5]: ../general/obtaining-login-credentials/obtaining-login-credentials.md
[6]: shell-and-data-access.md

[a]: http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg
+75 −0
Original line number Original line Diff line number Diff line
# Job Scheduling

## Job Execution Priority

The scheduler gives each job an execution priority and then uses this job execution priority to select which job(s) to run.

Job execution priority on Anselm is determined by these job properties (in order of importance):

1. queue priority
1. fair-share priority
1. eligible time

### Queue Priority

Queue priority is the priority of the queue in which the job is waiting prior to execution.

Queue priority has the biggest impact on job execution priority. The execution priority of jobs in higher priority queues is always greater than the execution priority of jobs in lower priority queues. Other properties of jobs used for determining the job execution priority (fair-share priority, eligible time) cannot compete with queue priority.

Queue priorities can be seen [here][a].

### Fair-Share Priority

Fair-share priority is priority calculated on the basis of recent usage of resources. Fair-share priority is calculated per project, all members of a project sharing the same fair-share priority. Projects with higher recent usage have a lower fair-share priority than projects with lower or no recent usage.

Fair-share priority is used for ranking jobs with equal queue priority.

Fair-share priority is calculated as

---8<--- "fairshare_formula.md"

where MAX_FAIRSHARE has value 1E6,
usage<sub>Project</sub> is accumulated usage by all members of a selected project,
usage<sub>Total</sub> is total usage by all users, across all projects.

Usage counts allocated core-hours (`ncpus x walltime`). Usage decays, halving at intervals of 168 hours (one week).
Jobs queued in the queue qexp are not used to calculate the project's usage.

!!! note
    Calculated usage and fair-share priority can be seen [here][b].

Calculated fair-share priority can be also be seen in the Resource_List.fairshare attribute of a job.

### Eligible Time

Eligible time is the amount (in seconds) of eligible time a job accrues while waiting to run. Jobs with higher eligible time gain higher priority.

Eligible time has the least impact on execution priority. Eligible time is used for sorting jobs with equal queue priority and fair-share priority. It is very, very difficult for eligible time to compete with fair-share priority.

Eligible time can be seen in the eligible_time attribute of job.

### Formula

Job execution priority (job sort formula) is calculated as:

---8<--- "job_sort_formula.md"

### Job Backfilling

The Anselm cluster uses job backfilling.

Backfilling means fitting smaller jobs around the higher-priority jobs that the scheduler is going to run next, in such a way that the higher-priority jobs are not delayed. Backfilling allows us to keep resources from becoming idle when the top job (the job with the highest execution priority) cannot run.

The scheduler makes a list of jobs to run in order of execution priority. The scheduler looks for smaller jobs that can fit into the usage gaps around the highest-priority jobs in the list. The scheduler looks in the prioritized list of jobs and chooses the highest-priority smaller jobs that fit. Filler jobs are run only if they will not delay the start time of top jobs.

This means that jobs with lower execution priority can be run before jobs with higher execution priority.

!!! note
    It is **very beneficial to specify the walltime** when submitting jobs.

Specifying more accurate walltime enables better scheduling, better execution times, and better resource usage. Jobs with suitable (small) walltime can be backfilled - and overtake job(s) with a higher priority.

---8<--- "mathjax.md"

[a]: https://extranet.it4i.cz/anselm/queues
[b]: https://extranet.it4i.cz/anselm/projects
Original line number Original line Diff line number Diff line
# Job Submission and Execution

## Job Submission

When allocating computational resources for the job, specify:

1. a suitable queue for your job (the default is qprod)
1. the number of computational nodes required
1. the number of cores per node required
1. the maximum wall time allocated to your calculation, note that jobs exceeding the maximum wall time will be killed
1. your Project ID
1. a Jobscript or interactive switch

!!! note
    Use the **qsub** command to submit your job to a queue for allocation of computational resources.

Submit the job using the qsub command:

```console
$ qsub -A Project_ID -q queue -l select=x:ncpus=y,walltime=[[hh:]mm:]ss[.ms] jobscript
```

The qsub command submits the job to the queue, i.e. the qsub command creates a request to the PBS Job manager for allocation of specified resources. The resources will be allocated when available, subject to the above described policies and constraints. **After the resources are allocated, the jobscript or interactive shell is executed on the first of the allocated nodes.**

!!! note
    PBS statement nodes (qsub -l nodes=nodespec) are not supported on the Anselm cluster.

### Job Submission Examples

```console
$ qsub -A OPEN-0-0 -q qprod -l select=64:ncpus=16,walltime=03:00:00 ./myjob
```

In this example, we allocate 64 nodes, 16 cores per node, for 3 hours. We allocate these resources via the qprod queue, consumed resources will be accounted to the Project identified by Project ID OPEN-0-0. The jobscript 'myjob' will be executed on the first node in the allocation.

```console
$ qsub -q qexp -l select=4:ncpus=16 -I
```

In this example, we allocate 4 nodes, 16 cores per node, for 1 hour. We allocate these resources via the qexp queue. The resources will be available interactively.

```console
$ qsub -A OPEN-0-0 -q qnvidia -l select=10:ncpus=16 ./myjob
```

In this example, we allocate 10 nvidia accelerated nodes, 16 cores per node, for 24 hours. We allocate these resources via the qnvidia queue. the jobscript 'myjob' will be executed on the first node in the allocation.

```console
$ qsub -A OPEN-0-0 -q qfree -l select=10:ncpus=16 ./myjob
```

In this example, we allocate 10 nodes, 16 cores per node, for 12 hours. We allocate these resources via the qfree queue. It is not required that the project OPEN-0-0 has any available resources left. Consumed resources are still accounted for. The jobscript myjob will be executed on the first node in the allocation.

All qsub options may be [saved directly into the jobscript][1]. In such cases, it is not necessary to specify any options for qsub.

```console
$ qsub ./myjob
```

By default, the PBS batch system sends an e-mail only when the job is aborted. Disabling mail events completely can be done as follows:

```console
$ qsub -m n
```

## Advanced Job Placement

### Placement by Name

Specific nodes may be allocated via the PBS

```console
$ qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=16:host=cn171+1:ncpus=16:host=cn172 -I
```

In this example, we allocate nodes cn171 and cn172, all 16 cores per node, for 24 hours.  Consumed resources will be accounted to the Project identified by Project ID OPEN-0-0. The resources will be available interactively.

### Placement by CPU Type

Nodes equipped with an Intel Xeon E5-2665 CPU have a base clock frequency of 2.4GHz, nodes equipped with an Intel Xeon E5-2470 CPU have a base frequency of 2.3 GHz (see the section Compute Nodes for details).  Nodes may be selected via the PBS resource attribute cpu_freq .

| CPU Type           | base freq. | Nodes                  | cpu_freq attribute |
| ------------------ | ---------- | ---------------------- | ------------------ |
| Intel Xeon E5-2665 | 2.4GHz     | cn[1-180], cn[208-209] | 24                 |
| Intel Xeon E5-2470 | 2.3GHz     | cn[181-207]            | 23                 |

```console
$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16:cpu_freq=24 -I
```

In this example, we allocate 4 nodes, 16 cores per node, selecting only the nodes with Intel Xeon E5-2665 CPU.

### Placement by IB Switch

Groups of computational nodes are connected to chassis integrated Infiniband switches. These switches form the leaf switch layer of the [Infiniband network][2] fat tree topology. Nodes sharing the leaf switch can communicate most efficiently. Sharing the same switch prevents hops in the network and facilitates unbiased, highly efficient network communication.

Nodes sharing the same switch may be selected via the PBS resource attribute ibswitch. Values of this attribute are iswXX, where XX is the switch number. The node-switch mapping can be seen in the [Hardware Overview][3] section.

We recommend allocating compute nodes to a single switch when best possible computational network performance is required to run the job efficiently:

```console
$ qsub -A OPEN-0-0 -q qprod -l select=18:ncpus=16:ibswitch=isw11 ./myjob
```

In this example, we request all of the 18 nodes sharing the isw11 switch for 24 hours. a full chassis will be allocated.

## Advanced Job Handling

### Selecting Turbo Boost Off

Intel Turbo Boost Technology is on by default. We strongly recommend keeping the default.

If necessary (such as in the case of benchmarking) you can disable the Turbo for all nodes of the job by using the PBS resource attribute cpu_turbo_boost:

```console
$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I
```

More information about the Intel Turbo Boost can be found in the TurboBoost section

### Advanced Examples

In the following example, we select an allocation for benchmarking a very special and demanding MPI program. We request Turbo off, and 2 full chassis of compute nodes (nodes sharing the same IB switches) for 30 minutes:

```console
$ qsub -A OPEN-0-0 -q qprod
    -l select=18:ncpus=16:ibswitch=isw10:mpiprocs=1:ompthreads=16+18:ncpus=16:ibswitch=isw20:mpiprocs=16:ompthreads=1
    -l cpu_turbo_boost=0,walltime=00:30:00
    -N Benchmark ./mybenchmark
```

The MPI processes will be distributed differently on the nodes connected to the two switches. On the isw10 nodes, we will run 1 MPI process per node with 16 threads per process, on isw20 nodes we will run 16 plain MPI processes.

Although this example is somewhat artificial, it demonstrates the flexibility of the qsub command options.

## Job Management

!!! note
    Check status of your jobs using the **qstat** and **check-pbs-jobs** commands

```console
$ qstat -a
$ qstat -a -u username
$ qstat -an -u username
$ qstat -f 12345.srv11
```

Example:

```console
$ qstat -a

srv11:
                                                            Req'd Req'd   Elap
Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
16287.srv11     user1    qlong    job1         6183   4 64    --  144:0 R 38:25
16468.srv11     user1    qlong    job2         8060   4 64    --  144:0 R 17:44
16547.srv11     user2    qprod    job3x       13516   2 32    --  48:00 R 00:58
```

In this example user1 and user2 are running jobs named job1, job2 and job3x. The jobs job1 and job2 are using 4 nodes, 16 cores per node each. job1 has already run for 38 hours and 25 minutes, and job2 for 17 hours 44 minutes. job1 has already consumed `64 x 38.41 = 2458.6` core hours. job3x has already consumed `0.96 x 32 = 30.93` core hours. These consumed core hours will be accounted for on the respective project accounts, regardless of whether the allocated cores were actually used for computations.

The following commands allow you to; check the status of your jobs using the check-pbs-jobs command; check for the presence of user's PBS jobs' processes on execution hosts; display load and processes; display job standard and error output; continuously display (tail -f) job standard or error output;

```console
$ check-pbs-jobs --check-all
$ check-pbs-jobs --print-load --print-processes
$ check-pbs-jobs --print-job-out --print-job-err
$ check-pbs-jobs --jobid JOBID --check-all --print-all
$ check-pbs-jobs --jobid JOBID --tailf-job-out
```

Examples:

```console
$ check-pbs-jobs --check-all
JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
Check session id: OK
Check processes
cn164: OK
cn165: No process
```

In this example we see that job 35141.dm2 is not currently running any processes on the allocated node cn165, which may indicate an execution error.

```console
$ check-pbs-jobs --print-load --print-processes
JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
Print load
cn164: LOAD: 16.01, 16.01, 16.00
cn165: LOAD:  0.01,  0.00,  0.01
Print processes
       %CPU CMD
cn164:  0.0 -bash
cn164:  0.0 /bin/bash /var/spool/PBS/mom_priv/jobs/35141.dm2.SC
cn164: 99.7 run-task
...
```

In this example we see that job 35141.dm2 is currently running a process run-task on node cn164, using one thread only, while node cn165 is empty, which may indicate an execution error.

```console
$ check-pbs-jobs --jobid 35141.dm2 --print-job-out
JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
Print job standard output:
======================== Job start  ==========================
Started at    : Fri Aug 30 02:47:53 CEST 2013
Script name   : script
Run loop 1
Run loop 2
Run loop 3
```

In this example, we see actual output (some iteration loops) of the job 35141.dm2

!!! note
    Manage your queued or running jobs, using the **qhold**, **qrls**, **qdel**, **qsig** or **qalter** commands

You may release your allocation at any time, using the qdel command

```console
$ qdel 12345.srv11
```

You may kill a running job by force, using the qsig command

```console
$ qsig -s 9 12345.srv11
```

Learn more by reading the pbs man page

```console
$ man pbs_professional
```

## Job Execution

### Jobscript

!!! note
    Prepare the jobscript to run batch jobs in the PBS queue system

The Jobscript is a user made script controlling a sequence of commands for executing the calculation. It is often written in bash, though other scripts may be used as well. The jobscript is supplied to the PBS **qsub** command as an argument, and is executed by the PBS Professional workload manager.

!!! note
    The jobscript or interactive shell is executed on first of the allocated nodes.

```console
$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob
$ qstat -n -u username

srv11:
                                                            Req'd Req'd   Elap
Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
15209.srv11     username qexp     Name0        5530   4 64    --  01:00 R 00:00
   cn17/0*16+cn108/0*16+cn109/0*16+cn110/0*16
```

In this example, the nodes cn17, cn108, cn109, and cn110 were allocated for 1 hour via the qexp queue. The jobscript myjob will be executed on the node cn17, while the nodes cn108, cn109, and cn110 are available for use as well.

The jobscript or interactive shell is by default executed in the home directory

```console
$ qsub -q qexp -l select=4:ncpus=16 -I
qsub: waiting for job 15210.srv11 to start
qsub: job 15210.srv11 ready

$ pwd
/home/username
```

In this example, 4 nodes were allocated interactively for 1 hour via the qexp queue. The interactive shell is executed in the home directory.

!!! note
    All nodes within the allocation may be accessed via ssh.  Unallocated nodes are not accessible to the user.

The allocated nodes are accessible via ssh from login nodes. The nodes may access each other via ssh as well.

Calculations on allocated nodes may be executed remotely via the MPI, ssh, pdsh or clush. You may find out which nodes belong to the allocation by reading the $PBS_NODEFILE file

```console
qsub -q qexp -l select=4:ncpus=16 -I
qsub: waiting for job 15210.srv11 to start
qsub: job 15210.srv11 ready

$ pwd
/home/username

$ sort -u $PBS_NODEFILE
cn17.bullx
cn108.bullx
cn109.bullx
cn110.bullx

$ pdsh -w cn17,cn[108-110] hostname
cn17: cn17
cn108: cn108
cn109: cn109
cn110: cn110
```

In this example, the hostname program is executed via pdsh from the interactive shell. The execution runs on all four allocated nodes. The same result would be achieved if the pdsh is called from any of the allocated nodes or from the login nodes.

### Example Jobscript for MPI Calculation

!!! note
    Production jobs must use the /scratch directory for I/O

The recommended way to run production jobs is to change to the /scratch directory early in the jobscript, copy all inputs to /scratch, execute the calculations and copy outputs to the home directory.

```bash
#!/bin/bash

# change to scratch directory, exit on failure
SCRDIR=/scratch/$USER/myjob
mkdir -p $SCRDIR
cd $SCRDIR || exit

# copy input file to scratch
cp $PBS_O_WORKDIR/input .
cp $PBS_O_WORKDIR/mympiprog.x .

# load the MPI module
ml OpenMPI

# execute the calculation
mpirun -pernode ./mympiprog.x

# copy output file to home
cp output $PBS_O_WORKDIR/.

#exit
exit
```

In this example, a directory in /home holds the input file input and executable mympiprog.x . We create the directory myjob on the /scratch filesystem, copy input and executable files from the /home directory where the qsub was invoked ($PBS_O_WORKDIR) to /scratch, execute the MPI program mympiprog.x and copy the output file back to the /home directory. mympiprog.x is executed as one process per node, on all allocated nodes.

!!! note
    Consider preloading inputs and executables onto [shared scratch][4] memory before the calculation starts.

In some cases, it may be impractical to copy the inputs to the scratch memory and the outputs to the home directory. This is especially true when very large input and output files are expected, or when the files should be reused by a subsequent calculation. In such cases, it is the users' responsibility to preload the input files on shared /scratch memory before the job submission, and retrieve the outputs manually after all calculations are finished.

!!! note
    Store the qsub options within the jobscript. Use **mpiprocs** and **ompthreads** qsub options to control the MPI job execution.

### Example Jobscript for MPI Calculation With Preloaded Inputs

Example jobscript for an MPI job with preloaded inputs and executables, options for qsub are stored within the script:

```bash
#!/bin/bash
#PBS -q qprod
#PBS -N MYJOB
#PBS -l select=100:ncpus=16:mpiprocs=1:ompthreads=16
#PBS -A OPEN-0-0

# change to scratch directory, exit on failure
SCRDIR=/scratch/$USER/myjob
cd $SCRDIR || exit

# load the MPI module
ml OpenMPI

# execute the calculation
mpirun ./mympiprog.x

#exit
exit
```

In this example, input and executable files are assumed to be preloaded manually in the /scratch/$USER/myjob directory. Note the **mpiprocs** and **ompthreads** qsub options controlling the behavior of the MPI execution. mympiprog.x is executed as one process per node, on all 100 allocated nodes. If mympiprog.x implements OpenMP threads, it will run 16 threads per node.

More information can be found in the [Running OpenMPI][5] and [Running MPICH2][6] sections.

### Example Jobscript for Single Node Calculation

!!! note
    The local scratch directory is often useful for single node jobs. Local scratch memory will be deleted immediately after the job ends.

Example jobscript for single node calculation, using [local scratch][4] memory on the node:

```bash
#!/bin/bash

# change to local scratch directory
cd /lscratch/$PBS_JOBID || exit

# copy input file to scratch
cp $PBS_O_WORKDIR/input .
cp $PBS_O_WORKDIR/myprog.x .

# execute the calculation
./myprog.x

# copy output file to home
cp output $PBS_O_WORKDIR/.

#exit
exit
```

In this example, a directory in /home holds the input file input and executable myprog.x . We copy input and executable files from the home directory where the qsub was invoked ($PBS_O_WORKDIR) to local scratch memory /lscratch/$PBS_JOBID, execute myprog.x and copy the output file back to the /home directory. myprog.x runs on one node only and may use threads.

### Other Jobscript Examples

Further jobscript examples may be found in the software section and the [Capacity computing][7] section.

[1]: #example-jobscript-for-mpi-calculation-with-preloaded-inputs
[2]: network.md
[3]: hardware-overview.md
[4]: storage.md
[5]: ../software/mpi/running_openmpi.md
[6]: ../software/mpi/running-mpich2.md
[7]: capacity-computing.md
+38 −0
Original line number Original line Diff line number Diff line
# Network

All of the compute and login nodes of Anselm are interconnected through an [InfiniBand][a] QDR network and a Gigabit [Ethernet][b] network. Both networks may be used to transfer user data.

## InfiniBand Network

All of the compute and login nodes of Anselm are interconnected through a high-bandwidth, low-latency [InfiniBand][a] QDR network (IB 4 x QDR, 40 Gbps). The network topology is a fully non-blocking fat-tree.

The compute nodes may be accessed via the InfiniBand network using ib0 network interface, in address range 10.2.1.1-209. The MPI may be used to establish native InfiniBand connection among the nodes.

!!! note
    The network provides **2170 MB/s** transfer rates via the TCP connection (single stream) and up to **3600 MB/s** via the native InfiniBand protocol.

The Fat tree topology ensures that peak transfer rates are achieved between any two nodes, independent of network traffic exchanged among other nodes concurrently.

## Ethernet Network

The compute nodes may be accessed via the regular Gigabit Ethernet network interface eth0, in address range 10.1.1.1-209, or by using aliases cn1-cn209. The network provides **114 MB/s** transfer rates via the TCP connection.

## Example

In this example, we access the node cn110 through the InfiniBand network via the ib0 interface, then from cn110 to cn108 through the Ethernet network.

```console
$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob
$ qstat -n -u username
                                                            Req'd Req'd   Elap
Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
15209.srv11     username qexp     Name0        5530   4 64    --  01:00 R 00:00
   cn17/0*16+cn108/0*16+cn109/0*16+cn110/0*16

$ ssh 10.2.1.110
$ ssh 10.1.1.108
```

[a]: http://en.wikipedia.org/wiki/InfiniBand
[b]: http://en.wikipedia.org/wiki/Ethernet
Original line number Original line Diff line number Diff line
# Resources Allocation Policy

## Job Queue Policies

The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and the resources available to the Project. The Fair-share system of Anselm ensures that individual users may consume approximately equal amounts of resources per week. Detailed information can be found in the [Job scheduling][1] section. The resources are accessible via several queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. The following table provides the queue partitioning overview:

!!! note
    Check the queue status at <https://extranet.it4i.cz/anselm/>

| queue               | active project | project resources    | nodes                                                | min ncpus | priority | authorization | walltime |
| ------------------- | -------------- | -------------------- | ---------------------------------------------------- | --------- | -------- | ------------- | -------- |
| qexp                | no             | none required        | 209 nodes                                            | 1         | 150      | no            | 1 h      |
| qprod               | yes            | > 0                  | 180 nodes w/o accelerator                            | 16        | 0        | no            | 24/48 h  |
| qlong               | yes            | > 0                  | 180 nodes w/o accelerator                            | 16        | 0        | no            | 72/144 h |
| qnvidia, qmic       | yes            | > 0                  | 23 nvidia nodes, 4 mic nodes                         | 16        | 200      | yes           | 24/48 h  |
| qfat                | yes            | > 0                  | 2 fat nodes                                          | 16        | 200      | yes           | 24/144 h |
| qfree               | yes            | < 120% of allocation | 180 w/o accelerator                                  | 16        | -1024    | no            | 12 h     |

!!! note
 **The qfree queue is not free of charge**. [Normal accounting][2] applies. However, it allows for utilization of free resources, once a project has exhausted all its allocated computational resources. This does not apply to Director's Discretion projects (DD projects) by default. Usage of qfree after exhaustion of DD projects' computational resources is allowed after request for this queue.

**The qexp queue is equipped with nodes which do not have exactly the same CPU clock speed.** Should you need the nodes to have exactly the same CPU speed, you have to select the proper nodes during the PSB job submission.

* **qexp**, the Express queue: This queue is dedicated to testing and running very small jobs. It is not required to specify a project to enter the qexp. There are always 2 nodes reserved for this queue (w/o accelerators), a maximum 8 nodes are available via the qexp for a particular user, from a pool of nodes containing Nvidia accelerated nodes (cn181-203), MIC accelerated nodes (cn204-207) and Fat nodes with 512GB of RAM (cn208-209). This enables us to test and tune accelerated code and code with higher RAM requirements. The nodes may be allocated on a per core basis. No special authorization is required to use qexp. The maximum runtime in qexp is 1 hour.
* **qprod**, the Production queue: This queue is intended for normal production runs. It is required that an active project with nonzero remaining resources is specified to enter the qprod. All nodes may be accessed via the qprod queue, except the reserved ones. 178 nodes without accelerators are included. Full nodes, 16 cores per node, are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qprod is 48 hours.
* **qlong**, the Long queue: This queue is intended for long production runs. It is required that an active project with nonzero remaining resources is specified to enter the qlong. Only 60 nodes without acceleration may be accessed via the qlong queue. Full nodes, 16 cores per node, are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qlong is 144 hours (three times that of the standard qprod time - 3 x 48 h).
* **qnvidia**, qmic, qfat, the Dedicated queues: The queue qnvidia is dedicated to accessing the Nvidia accelerated nodes, the qmic to accessing MIC nodes and qfat the Fat nodes. It is required that an active project with nonzero remaining resources is specified to enter these queues. 23 nvidia, 4 mic, and 2 fat nodes are included. Full nodes, 16 cores per node, are allocated. The queues run with very high priority, the jobs will be scheduled before the jobs coming from the qexp queue. An PI needs to explicitly ask [support][a] for authorization to enter the dedicated queues for all users associated with her/his project.
* **qfree**, The Free resource queue: The queue qfree is intended for utilization of free resources, after a project has exhausted all of its allocated computational resources (Does not apply to DD projects by default; DD projects have to request persmission to use qfree after exhaustion of computational resources). It is required that active project is specified to enter the queue. Consumed resources will be accounted to the Project. Access to the qfree queue is automatically removed if consumed resources exceed 120% of the resources allocated to the Project. Only 180 nodes without accelerators may be accessed from this queue. Full nodes, 16 cores per node, are allocated. The queue runs with very low priority and no special authorization is required to use it. The maximum runtime in qfree is 12 hours.

## Queue Notes

The job wall clock time defaults to **half the maximum time**, see the table above. Longer wall time limits can be  [set manually, see examples][3].

Jobs that exceed the reserved wall clock time (Req'd Time) get killed automatically. The wall clock time limit can be changed for queuing jobs (state Q) using the qalter command, however it cannot be changed for a running job (state R).

Anselm users may check the current queue configuration [here][b].

## Queue Status

!!! tip
    Check the status of jobs, queues and compute nodes [here][c].

![rspbs web interface](../img/rsweb.png)

Display the queue status on Anselm:

```console
$ qstat -q
```

The PBS allocation overview may be obtained also using the rspbs command:

```console
$ rspbs
Usage: rspbs [options]

Options:
  --version             show program's version number and exit
  -h, --help            show this help message and exit
  --get-node-ncpu-chart
                        Print chart of allocated ncpus per node
  --summary             Print summary
  --get-server-details Print server
  --get-queues          Print queues
  --get-queues-details Print queues details
  --get-reservations    Print reservations
  --get-reservations-details
                        Print reservations details
  --get-nodes           Print nodes of PBS complex
  --get-nodeset         Print nodeset of PBS complex
  --get-nodes-details   Print nodes details
  --get-jobs            Print jobs
  --get-jobs-details    Print jobs details
  --get-jobs-check-params
                        Print jobid, job state, session_id, user, nodes
  --get-users           Print users of jobs
  --get-allocated-nodes
                        Print allocated nodes of jobs
  --get-allocated-nodeset
                        Print allocated nodeset of jobs
  --get-node-users      Print node users
  --get-node-jobs       Print node jobs
  --get-node-ncpus      Print number of ncpus per node
  --get-node-allocated-ncpus
                        Print number of allocated ncpus per node
  --get-node-qlist      Print node qlist
  --get-node-ibswitch   Print node ibswitch
  --get-user-nodes      Print user nodes
  --get-user-nodeset    Print user nodeset
  --get-user-jobs       Print user jobs
  --get-user-jobc       Print number of jobs per user
  --get-user-nodec      Print number of allocated nodes per user
  --get-user-ncpus      Print number of allocated ncpus per user
  --get-qlist-nodes     Print qlist nodes
  --get-qlist-nodeset   Print qlist nodeset
  --get-ibswitch-nodes Print ibswitch nodes
  --get-ibswitch-nodeset
                        Print ibswitch nodeset
  --state=STATE         Only for given job state
  --jobid=JOBID         Only for given job ID
  --user=USER           Only for given user
  --node=NODE           Only for given node
  --nodestate=NODESTATE
                        Only for given node state (affects only --get-node*
                        --get-qlist-* --get-ibswitch-* actions)
  --incl-finished       Include finished jobs
```

---8<--- "resource_accounting.md"

---8<--- "mathjax.md"

[1]: job-priority.md
[2]: #resources-accounting-policy
[3]: job-submission-and-execution.md

[a]: https://support.it4i.cz/rt/
[b]: https://extranet.it4i.cz/anselm/queues
[c]: https://extranet.it4i.cz/anselm/
Original line number Original line Diff line number Diff line
# Accessing the Cluster

## Shell Access

The Anselm cluster is accessed by SSH protocol via login nodes login1 and login2 at the address anselm.it4i.cz. The login nodes may be addressed specifically, by prepending the login node name to the address.

| Login address         | Port | Protocol | Login node                                   |
| --------------------- | ---- | -------- | -------------------------------------------- |
| anselm.it4i.cz        | 22   | ssh      | round-robin DNS record for login1 and login2 |
| login1.anselm.it4i.cz | 22   | ssh      | login1                                       |
| login2.anselm.it4i.cz | 22   | ssh      | login2                                       |

Authentication is available by [private key][1] only.

!!! note
    Please verify SSH fingerprints during the first logon. They are identical on all login nodes:

    md5:

    29:b3:f4:64:b0:73:f5:6f:a7:85:0f:e0:0d:be:76:bf (DSA)
    d4:6f:5c:18:f4:3f:70:ef:bc:fc:cc:2b:fd:13:36:b7 (RSA)

    sha256:

    LX2034TYy6Lf0Q7Zf3zOIZuFlG09DaSGROGBz6LBUy4 (DSA)
    +DcED3GDoA9piuyvQOho+ltNvwB9SJSYXbB639hbejY (RSA)

Private key authentication:

On **Linux** or **Mac**, use:

```console
$ ssh -i /path/to/id_rsa username@anselm.it4i.cz
```

If you see a warning message "UNPROTECTED PRIVATE KEY FILE!", use this command to set lower permissions to the private key file:

```console
$ chmod 600 /path/to/id_rsa
```

On **Windows**, use [PuTTY ssh client][2].

After logging in, you will see the command prompt:

```console
                                            _
                       /\                  | |
                      /  \   _ __  ___  ___| |_ __ ___
                     / /\ \ | '_ \/ __|/ _ \ | '_ ` _ \
                    / ____ \| | | \__ \  __/ | | | | | |
                   /_/    \_\_| |_|___/\___|_|_| |_| |_|


                        http://www.it4i.cz/?lang=en

Last login: Tue Jul 9 15:57:38 2013 from your-host.example.com
[username@login2.anselm ~]$
```

Example to the cluster login:

!!! note
    The environment is **not** shared between login nodes, except for [shared filesystems][3].

## Data Transfer

Data in and out of the system may be transferred by the [scp][a] and sftp protocols. (Not available yet). In the case that large volumes of data are transferred, use the dedicated data mover node dm1.anselm.it4i.cz for increased performance.

| Address               | Port | Protocol  |
| --------------------- | ---- | --------- |
| anselm.it4i.cz        | 22   | scp       |
| login1.anselm.it4i.cz | 22   | scp       |
| login2.anselm.it4i.cz | 22   | scp       |

Authentication is by [private key][1] only.

!!! note
    Data transfer rates of up to **160MB/s** can be achieved with scp or sftp.

    1TB may be transferred in 1:50h.

To achieve 160MB/s transfer rates, the end user must be connected by 10G line all the way to IT4Innovations, and be using a computer with a fast processor for the transfer. When using a Gigabit ethernet connection, up to 110MB/s transfer rates may be expected.  Fast cipher (aes128-ctr) should be used.

!!! note
    If you experience degraded data transfer performance, consult your local network provider.

On linux or Mac, use an scp or sftp client to transfer data to Anselm:

```console
$ scp -i /path/to/id_rsa my-local-file username@anselm.it4i.cz:directory/file
```

```console
$ scp -i /path/to/id_rsa -r my-local-dir username@anselm.it4i.cz:directory
```

or

```console
$ sftp -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz
```

A very convenient way to transfer files in and out of Anselm is via the fuse filesystem [sshfs][b].

```console
$ sshfs -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz:. mountpoint
```

Using sshfs, the users Anselm home directory will be mounted on your local computer, just like an external disk.

Learn more about ssh, scp and sshfs by reading the manpages

```console
$ man ssh
$ man scp
$ man sshfs
```

On Windows, use the [WinSCP client][c] to transfer the data. The [win-sshfs client][d] provides a way to mount the Anselm filesystems directly as an external disc.

More information about the shared file systems is available [here][4].

## Connection Restrictions

Outgoing connections, from Anselm Cluster login nodes to the outside world, are restricted to the following ports:

| Port | Protocol |
| ---- | -------- |
| 22   | ssh      |
| 80   | http     |
| 443  | https    |
| 9418 | git      |

!!! note
    Please use **ssh port forwarding** and proxy servers to connect from Anselm to all other remote ports.

Outgoing connections, from Anselm Cluster compute nodes are restricted to the internal network. Direct connections form compute nodes to the outside world are cut.

## Port Forwarding

### Port Forwarding From Login Nodes

!!! note
    Port forwarding allows an application running on Anselm to connect to arbitrary remote hosts and ports.

It works by tunneling the connection from Anselm back to users' workstations and forwarding from the workstation to the remote host.

Pick some unused port on the Anselm login node  (for example 6000) and establish the port forwarding:

```console
$ ssh -R 6000:remote.host.com:1234 anselm.it4i.cz
```

In this example, we establish port forwarding between port 6000 on Anselm and port 1234 on the remote.host.com. By accessing localhost:6000 on Anselm, an application will see the response of remote.host.com:1234. The traffic will run via the user's local workstation.

Port forwarding may be done **using PuTTY** as well. On the PuTTY Configuration screen, load your Anselm configuration first. Then go to Connection->SSH->Tunnels to set up the port forwarding. Click Remote radio button. Insert 6000 to theSource port textbox. Insert remote.host.com:1234. Click the Add button, then Open.

Port forwarding may be established directly to the remote host. However, this requires that the user has ssh access to remote.host.com

```console
$ ssh -L 6000:localhost:1234 remote.host.com
```

!!! note
    Port number 6000 is chosen as an example only. Pick any free port.

### Port Forwarding From Compute Nodes

Remote port forwarding from compute nodes allows applications running on the compute nodes to access hosts outside the Anselm Cluster.

First, establish the remote port forwarding form the login node, as [described above][5].

Second, invoke port forwarding from the compute node to the login node. Insert the following line into your jobscript or interactive shell:

```console
$ ssh  -TN -f -L 6000:localhost:6000 login1
```

In this example, we assume that port forwarding from `login1:6000` to `remote.host.com:1234` has been established beforehand. By accessing `localhost:6000`, an application running on a compute node will see the response of `remote.host.com:1234`.

### Using Proxy Servers

Port forwarding is static, each single port is mapped to a particular port on a remote host. Connection to another remote host requires a new forward.

!!! note
    Applications with inbuilt proxy support experience unlimited access to remote hosts via a single proxy server.

To establish a local proxy server on your workstation, install and run SOCKS proxy server software. On Linux, sshd demon provides the functionality. To establish SOCKS proxy server listening on port 1080 run:

```console
$ ssh -D 1080 localhost
```

On Windows, install and run the free, open source [Sock Puppet][e] server.

Once the proxy server is running, establish ssh port forwarding from Anselm to the proxy server, port 1080, exactly as [described above][5]:

```console
$ ssh -R 6000:localhost:1080 anselm.it4i.cz
```

Now, configure the applications proxy settings to **localhost:6000**. Use port forwarding to access the [proxy server from compute nodes][5] as well.

## Graphical User Interface

* The [X Window system][6] is the principal way to get GUI access to the clusters.
* [Virtual Network Computing][7] is a graphical [desktop sharing][f] system that uses the [Remote Frame Buffer protocol][g] to remotely control another [computer][h].

## VPN Access

* Access IT4Innovations internal resources via [VPN][8].

[1]: ../general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md
[2]: ../general/accessing-the-clusters/shell-access-and-data-transfer/putty.md
[3]: storage.md#shared-filesystems
[4]: storage.md
[5]: #port-forwarding-from-login-nodes
[6]: ../general/accessing-the-clusters/graphical-user-interface/x-window-system.md
[7]: ../general/accessing-the-clusters/graphical-user-interface/vnc.md
[8]: ../general/accessing-the-clusters/vpn-access.md

[a]: http://en.wikipedia.org/wiki/Secure_copy
[b]: http://linux.die.net/man/1/sshfs
[c]: http://winscp.net/eng/download.php
[d]: http://code.google.com/p/win-sshfs/
[e]: http://sockspuppet.com/
[f]: http://en.wikipedia.org/wiki/Desktop_sharing
[g]: http://en.wikipedia.org/wiki/RFB_protocol
[h]: http://en.wikipedia.org/wiki/Computer
+314 −0
Original line number Original line Diff line number Diff line
# NVIDIA CUDA

Guide to NVIDIA CUDA Programming and GPU Usage

## CUDA Programming on Anselm

The default programming model for GPU accelerators on Anselm is Nvidia CUDA. To set up the environment for CUDA use;

```console
$ ml av cuda
$ ml cuda **or** ml CUDA
```

If the user code is hybrid and uses both CUDA and MPI, the MPI environment has to be set up as well. One way to do this is to use the PrgEnv-gnu module, which sets up the correct combination of the GNU compiler and MPI library;

```console
$ ml PrgEnv-gnu
```

CUDA code can be compiled directly on login1 or login2 nodes. The user does not have to use compute nodes with GPU accelerators for compilation. To compile CUDA source code, use an nvcc compiler;

```console
$ nvcc --version
```

The CUDA Toolkit comes with large number of examples which can be a helpful reference to start with. To compile and test these examples, users should copy them to their home directory;

```console
$ cd ~
$ mkdir cuda-samples
$ cp -R /apps/nvidia/cuda/6.5.14/samples/* ~/cuda-samples/
```

To compile examples, change directory to the particular example (here the example used is deviceQuery) and run "make" to start the compilation;

```console
$ cd ~/cuda-samples/1_Utilities/deviceQuery
$ make
```

To run the code, the user can use PBS interactive session to get access to a node from qnvidia queue (note: use your project name with parameter -A in the qsub command) and execute the binary file;

```console
$ qsub -I -q qnvidia -A OPEN-0-0
$ ml cuda
$ ~/cuda-samples/1_Utilities/deviceQuery/deviceQuery
```

The expected output of the deviceQuery example executed on a node with a Tesla K20m is;

```console
    CUDA Device Query (Runtime API) version (CUDART static linking)

    Detected 1 CUDA Capable device(s)

    Device 0: "Tesla K20m"
    CUDA Driver Version / Runtime Version 5.0 / 5.0
    CUDA Capability Major/Minor version number: 3.5
    Total amount of global memory: 4800 MBytes (5032706048 bytes)
    (13) Multiprocessors x (192) CUDA Cores/MP: 2496 CUDA Cores
    GPU Clock rate: 706 MHz (0.71 GHz)
    Memory Clock rate: 2600 Mhz
    Memory Bus Width: 320-bit
    L2 Cache Size: 1310720 bytes
    Max Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536,65536), 3D=(4096,4096,4096)
    Max Layered Texture Size (dim) x layers 1D=(16384) x 2048, 2D=(16384,16384) x 2048
    Total amount of constant memory: 65536 bytes
    Total amount of shared memory per block: 49152 bytes
    Total number of registers available per block: 65536
    Warp size: 32
    Maximum number of threads per multiprocessor: 2048
    Maximum number of threads per block: 1024
    Maximum sizes of each dimension of a block: 1024 x 1024 x 64
    Maximum sizes of each dimension of a grid: 2147483647 x 65535 x 65535
    Maximum memory pitch: 2147483647 bytes
    Texture alignment: 512 bytes
    Concurrent copy and kernel execution: Yes with 2 copy engine(s)
    Run time limit on kernels: No
    Integrated GPU sharing Host Memory: No
    Support host page-locked memory mapping: Yes
    Alignment requirement for Surfaces: Yes
    Device has ECC support: Enabled
    Device supports Unified Addressing (UVA): Yes
    Device PCI Bus ID / PCI location ID: 2 / 0
    Compute Mode:
    < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
    deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 5.0, CUDA Runtime Version = 5.0, NumDevs = 1, Device0 = Tesla K20m
```

### Code Example

In this section we provide a basic CUDA based vector addition code example. You can directly copy and paste the code to test it.

```cpp
$ vim test.cu

#define N (2048*2048)
#define THREADS_PER_BLOCK 512

#include <stdio.h>
#include <stdlib.h>

// GPU kernel function to add two vectors
__global__ void add_gpu( int *a, int *b, int *c, int n){
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index < n)
    c[index] = a[index] + b[index];
}

// CPU function to add two vectors
void add_cpu (int *a, int *b, int *c, int n) {
  for (int i=0; i < n; i++)
    c[i] = a[i] + b[i];
}

// CPU function to generate a vector of random integers
void random_ints (int *a, int n) {
  for (int i = 0; i < n; i++)
  a[i] = rand() % 10000; // random number between 0 and 9999
}

// CPU function to compare two vectors
int compare_ints( int *a, int *b, int n ){
  int pass = 0;
  for (int i = 0; i < N; i++){
    if (a[i] != b[i]) {
      printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]);
      pass = 1;
    }
  }
  if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn");
  return pass;
}

int main( void ) {

  int *a, *b, *c; // host copies of a, b, c
  int *dev_a, *dev_b, *dev_c; // device copies of a, b, c
  int size = N * sizeof( int ); // we need space for N integers

  // Allocate GPU/device copies of dev_a, dev_b, dev_c
  cudaMalloc( (void**)&dev_a, size );
  cudaMalloc( (void**)&dev_b, size );
  cudaMalloc( (void**)&dev_c, size );

  // Allocate CPU/host copies of a, b, c
  a = (int*)malloc( size );
  b = (int*)malloc( size );
  c = (int*)malloc( size );

  // Fill input vectors with random integer numbers
  random_ints( a, N );
  random_ints( b, N );

  // copy inputs to device
  cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice );
  cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice );

  // launch add_gpu() kernel with blocks and threads
  add_gpu<<< N/THREADS_PER_BLOCK, THREADS_PER_BLOCK >>( dev_a, dev_b, dev_c, N );

  // copy device result back to host copy of c
  cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost );

  //Check the results with CPU implementation
  int *c_h; c_h = (int*)malloc( size );
  add_cpu (a, b, c_h, N);
  compare_ints(c, c_h, N);

  // Clean CPU memory allocations
  free( a ); free( b ); free( c ); free (c_h);

  // Clean GPU memory allocations
  cudaFree( dev_a );
  cudaFree( dev_b );
  cudaFree( dev_c );

  return 0;
}
```

This code can be compiled using the following command;

```console
$ nvcc test.cu -o test_cuda
```

To run the code, use an interactive PBS session to get access to one of the GPU accelerated nodes;

```console
$ qsub -I -q qnvidia -A OPEN-0-0
$ ml cuda
$ ./test.cuda
```

## CUDA Libraries

### cuBLAS

The NVIDIA CUDA Basic Linear Algebra Subroutines (cuBLAS) library is a GPU-accelerated version of the complete standard BLAS library with 152 standard BLAS routines. A basic description of the library together with basic performance comparisons with MKL can be found [here][a].

#### cuBLAS Example: SAXPY

The SAXPY function multiplies the vector x by the scalar alpha, and adds it to the vector y, overwriting the latest vector with the result. A description of the cuBLAS function can be found in [NVIDIA CUDA documentation][b]. Code can be pasted in the file and compiled without any modification.

```cpp
/* Includes, system */
#include <stdio.h>
#include <stdlib.h>

/* Includes, cuda */
#include <cuda_runtime.h>
#include <cublas_v2.h>

/* Vector size */
#define N  (32)

/* Host implementation of a simple version of saxpi */
void saxpy(int n, float alpha, const float *x, float *y)
{
    for (int i = 0; i < n; ++i)
    y[i] = alpha*x[i] + y[i];
}

/* Main */
int main(int argc, char **argv)
{
    float *h_X, *h_Y, *h_Y_ref;
    float *d_X = 0;
    float *d_Y = 0;

    const float alpha = 1.0f;
    int i;

    cublasHandle_t handle;

    /* Initialize CUBLAS */
    printf("simpleCUBLAS test running..n");
    cublasCreate(&handle);

    /* Allocate host memory for the matrices */
    h_X = (float *)malloc(N * sizeof(h_X[0]));
    h_Y = (float *)malloc(N * sizeof(h_Y[0]));
    h_Y_ref = (float *)malloc(N * sizeof(h_Y_ref[0]));

    /* Fill the matrices with test data */
    for (i = 0; i < N; i++)
    {
        h_X[i] = rand() / (float)RAND_MAX;
        h_Y[i] = rand() / (float)RAND_MAX;
        h_Y_ref[i] = h_Y[i];
    }

    /* Allocate device memory for the matrices */
    cudaMalloc((void **)&d_X, N * sizeof(d_X[0]));
    cudaMalloc((void **)&d_Y, N * sizeof(d_Y[0]));

    /* Initialize the device matrices with the host matrices */
    cublasSetVector(N, sizeof(h_X[0]), h_X, 1, d_X, 1);
    cublasSetVector(N, sizeof(h_Y[0]), h_Y, 1, d_Y, 1);

    /* Performs operation using plain C code */
    saxpy(N, alpha, h_X, h_Y_ref);

    /* Performs operation using cublas */
    cublasSaxpy(handle, N, &alpha, d_X, 1, d_Y, 1);

    /* Read the result back */
    cublasGetVector(N, sizeof(h_Y[0]), d_Y, 1, h_Y, 1);

    /* Check result against reference */
    for (i = 0; i < N; ++i)
        printf("CPU res = %f t GPU res = %f t diff = %f n", h_Y_ref[i], h_Y[i], h_Y_ref[i] - h_Y[i]);

    /* Memory clean up */
    free(h_X); free(h_Y); free(h_Y_ref);
    cudaFree(d_X); cudaFree(d_Y);

    /* Shutdown */
    cublasDestroy(handle);
}
```

!!! note
    cuBLAS has its own function for data transfers between CPU and GPU memory:
    - [cublasSetVector][c] - transfers data from CPU to GPU memory
    - [cublasGetVector][d] - transfers data from GPU to CPU memory

To compile the code using the NVCC compiler a "-lcublas" compiler flag has to be specified:

```console
$ ml cuda
$ nvcc -lcublas test_cublas.cu -o test_cublas_nvcc
```

To compile the same code with GCC:

```console
$ ml cuda
$ gcc -std=c99 test_cublas.c -o test_cublas_icc -lcublas -lcudart
```

To compile the same code with an Intel compiler:

```console
$ ml cuda
$ ml intel
$ icc -std=c99 test_cublas.c -o test_cublas_icc -lcublas -lcudart
```

[a]: https://developer.nvidia.com/cublas
[b]: http://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-axpy
[c]: http://docs.nvidia.com/cuda/cublas/index.html#cublassetvector
[d]: http://docs.nvidia.com/cuda/cublas/index.html#cublasgetvector

docs.it4i/apiv1.md

0 → 100644
+3 −0
Original line number Original line Diff line number Diff line
# API Placeholder

This page is beeing create automatically from the API source code.
+67 −0
Original line number Original line Diff line number Diff line
# Environment and Modules

## Environment Customization

After logging in, you may want to configure the environment. Write your preferred path definitions, aliases, functions and module loads in the .bashrc file

```console
# ./bashrc

# users compilation path
export MODULEPATH=${MODULEPATH}:/home/$USER/.local/easybuild/modules/all

# User specific aliases and functions
alias qs='qstat -a'

# load default intel compilator !!! is not recommended !!!
ml intel

# Display information to standard output - only in interactive ssh session
if [ -n "$SSH_TTY" ]
then
 ml # Display loaded modules
fi
```

!!! note
    Do not run commands outputting to standard output (echo, module list, etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental functionality (SCP, PBS) of your account! Take care for SSH session interactivity for such commands as stated in the previous example.

### Application Modules

In order to configure your shell for running particular application on clusters we use Module package interface.

Application modules on clusters are built using [EasyBuild][1]. The modules are divided into the following structure:

```
 base: Default module class
 bio: Bioinformatics, biology and biomedical
 cae: Computer Aided Engineering (incl. CFD)
 chem: Chemistry, Computational Chemistry and Quantum Chemistry
 compiler: Compilers
 data: Data management & processing tools
 debugger: Debuggers
 devel: Development tools
 geo: Earth Sciences
 ide: Integrated Development Environments (e.g. editors)
 lang: Languages and programming aids
 lib: General purpose libraries
 math: High-level mathematical software
 mpi: MPI stacks
 numlib: Numerical Libraries
 perf: Performance tools
 phys: Physics and physical systems simulations
 system: System utilities (e.g. highly depending on system OS and hardware)
 toolchain: EasyBuild toolchains
 tools: General purpose tools
 vis: Visualization, plotting, documentation and typesetting
 OS: singularity image
 python: python packages
```

!!! note
    The modules set up the application paths, library paths and environment variables for running particular application.

The modules may be loaded, unloaded and switched, according to momentary needs. For details see [lmod][2].

[1]: software/tools/easybuild.md
[2]: software/modules/lmod.md
Original line number Original line Diff line number Diff line
# VNC

The **Virtual Network Computing** (**VNC**) is a graphical [desktop sharing][a] system that uses the [Remote Frame Buffer protocol (RFB)][b] to remotely control another [computer][c]). It transmits the [keyboard][d] and [mouse][e] events from one computer to another, relaying the graphical [screen][f] updates back in the other direction, over a [network][g].

Vnc-based connections are usually faster (require less network bandwidth) then [X11][1] applications forwarded directly through ssh.

The recommended clients are [TightVNC][h] or [TigerVNC][i] (free, open source, available for almost any platform).

In this chapter we show how to create an underlying ssh tunnel from your client machine to one of our login nodes. Then, how to start your own vnc server on our login node and finally how to connect to your vnc server via the encrypted ssh tunnel.

## Create VNC Password

!!! note
    Local VNC password should be set before the first login. Do use a strong password.

```console
[username@login2 ~]$ vncpasswd
Password:
Verify:
```

## Start Vncserver

!!! note
    To access VNC a local vncserver must be started first and also a tunnel using SSH port forwarding must be established.

[See below][2] for the details on SSH tunnels.

You should start by **choosing your display number**.
To choose free one, you should check currently occupied display numbers - list them using command:

```console
[username@login2 ~]$ ps aux | grep Xvnc | sed -rn 's/(\s) .*Xvnc (\:[0-9]+) .*/\1 \2/p'
username :79
username :60
.....
```

As you can see above, displays ":79" and ":60" we had occupied already.
Generally, you can choose display number freely *except these occupied numbers*.
Also remember that display number should be less or equal 99.
Based on this **we have choosen display number 61** for us, so this number you can see in examples below.

!!! note
    Your situation may be different so also choose of your number may be different. **Please choose and use your own display number accordingly!**

Start your VNC server on choosen display number (61):

```console
[username@login2 ~]$ vncserver :61 -geometry 1600x900 -depth 16

New 'login2:1 (username)' desktop is login2:1

Starting applications specified in /home/username/.vnc/xstartup
Log file is /home/username/.vnc/login2:1.log
```

Check whether VNC server is running on choosen display number (61):

```console
[username@login2 .vnc]$ vncserver -list

TigerVNC server sessions:

X DISPLAY #     PROCESS ID
:61              18437
```

Another way to check it:

```console
[username@login2 .vnc]$  ps aux | grep Xvnc | sed -rn 's/(\s) .*Xvnc (\:[0-9]+) .*/\1 \2/p'

username :61
username :102
```

!!! note
    The VNC server runs on port 59xx, where xx is the display number. So, you get your port number simply as 5900 + display number, in our example 5900 + 61 = 5961. Another example for display number 102 is calculation of TCP port 5900 + 102 = 6002 but be aware, that TCP ports above 6000 are often used by X11. **Please, calculate your own port number and use it instead of 5961 from examples below!**

To access the VNC server you have to create a tunnel between the login node using TCP port 5961 and your machine using a free TCP port (for simplicity the very same) in next step. See examples for [Linux/Mac OS][2] and [Windows][3].

!!! note
    The tunnel must point to the same login node where you launched the VNC server, eg. login2. If you use just cluster-name.it4i.cz, the tunnel might point to a different node due to DNS round robin.

## Linux/Mac OS Example of Creating a Tunnel

At your machine, create the tunnel:

```console
local $  ssh -TN -f username@login2.cluster-name.it4i.cz -L 5961:localhost:5961
```

Issue the following command to check the tunnel is established (note the PID 2022 in the last column, you'll need it for closing the tunnel):

```console
local $ netstat -natp | grep 5961
(Not all processes could be identified, non-owned process info
 will not be shown, you would have to be root to see it all.)
tcp        0      0 127.0.0.1:5961          0.0.0.0:*               LISTEN      2022/ssh
tcp6       0      0 ::1:5961                :::*                    LISTEN      2022/ssh
```

Or on Mac OS use this command:

```console
local-mac $ lsof -n -i4TCP:5961 | grep LISTEN
ssh 75890 sta545 7u IPv4 0xfb062b5c15a56a3b 0t0 TCP 127.0.0.1:5961 (LISTEN)
```

Connect with the VNC client:

```console
local $ vncviewer 127.0.0.1:5961
```

In this example, we connect to VNC server on port 5961, via the ssh tunnel. The connection is encrypted and secured. The VNC server listening on port 5961 provides screen of 1600x900 pixels.

You have to destroy the SSH tunnel which is still running at the background after you finish the work. Use the following command (PID 2022 in this case, see the netstat command above):

```console
kill 2022
```

## Windows Example of Creating a Tunnel

Start vncserver using command vncserver described above.

Search for the localhost and port number (in this case 127.0.0.1:5961).

```console
[username@login2 .vnc]$ netstat -tanp | grep Xvnc
(Not all processes could be identified, non-owned process info
 will not be shown, you would have to be root to see it all.)
tcp        0      0 127.0.0.1:5961              0.0.0.0:*                   LISTEN      24031/Xvnc
```

### PuTTY

On the PuTTY Configuration screen go to Connection->SSH->Tunnels to set up the tunnel.

Fill the Source port and Destination fields. **Do not forget to click the Add button**.

![](../../../img/putty-tunnel.png)

### WSL (Bash on Windows)

[Windows Subsystem for Linux][j] is another way to run Linux software in a Windows environment.

At your machine, create the tunnel:

```console
local $ ssh username@login2.cluster-name.it4i.cz -L 5961:localhost:5961
```

## Example of Starting VNC Client

Run the VNC client of your choice, select VNC server 127.0.0.1, port 5961 and connect using VNC password.

### TigerVNC Viewer

![](../../../img/vncviewer.png)

In this example, we connect to VNC server on port 5961, via the ssh tunnel, using TigerVNC viewer. The connection is encrypted and secured. The VNC server listening on port 5961 provides screen of 1600x900 pixels.

### TightVNC Viewer

Use your VNC password to log using TightVNC Viewer and start a Gnome Session on the login node.

![](../../../img/TightVNC_login.png)

## Gnome Session

You should see after the successful login.

![](../../../img/gnome_screen.png)

### Disable Your Gnome Session Screensaver

Open Screensaver preferences dialog:

![](../../../img/gdmscreensaver.png)

Uncheck both options below the slider:

![](../../../img/gdmdisablescreensaver.png)

### Kill Screensaver if Locked Screen

If the screen gets locked you have to kill the screensaver. Do not to forget to disable the screensaver then.

```console
[username@login2 .vnc]$ ps aux | grep screen
username     1503 0.0 0.0 103244   892 pts/4    S+   14:37   0:00 grep screen
username     24316 0.0 0.0 270564 3528 ?        Ss   14:12   0:00 gnome-screensaver

[username@login2 .vnc]$ kill 24316
```

## Kill Vncserver After Finished Work

You should kill your VNC server using command:

```console
[username@login2 .vnc]$  vncserver  -kill :61
Killing Xvnc process ID 7074
Xvnc process ID 7074 already killed
```

Or this way:

```console
[username@login2 .vnc]$  pkill vnc
```

!!! note
    Do not forget to terminate also SSH tunnel, if it was used. Look on end of [this section][2] for the details.

## GUI Applications on Compute Nodes Over VNC

The very same methods as described above, may be used to run the GUI applications on compute nodes. However, for maximum performance, proceed following these steps:

Open a Terminal (Applications -> System Tools -> Terminal). Run all the next commands in the terminal.

![](../../../img/gnome-terminal.png)

Allow incoming X11 graphics from the compute nodes at the login node:

```console
$ xhost +
```

Get an interactive session on a compute node (for more detailed info [look here][4]). Use the **-v DISPLAY** option to propagate the DISPLAY on the compute node. In this example, we want a complete node (16 cores in this example) from the production queue:

```console
$ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A PROJECT_ID -q qprod -l select=1:ncpus=16
```

Test that the DISPLAY redirection into your VNC session works, by running a X11 application (e. g. XTerm) on the assigned compute node:

```console
$ xterm
```

Example described above:

![](../../../img/gnome-compute-nodes-over-vnc.png)

[a]: http://en.wikipedia.org/wiki/Desktop_sharing
[b]: http://en.wikipedia.org/wiki/RFB_protocol
[c]: http://en.wikipedia.org/wiki/Computer
[d]: http://en.wikipedia.org/wiki/Computer_keyboard
[e]: http://en.wikipedia.org/wiki/Computer_mouse
[f]: http://en.wikipedia.org/wiki/Computer_screen
[g]: http://en.wikipedia.org/wiki/Computer_network
[h]: http://www.tightvnc.com
[i]: http://sourceforge.net/apps/mediawiki/tigervnc/index.php?title=Main_Page
[j]: http://docs.microsoft.com/en-us/windows/wsl

[1]: x-window-system.md
[2]: #linuxmac-os-example-of-creating-a-tunnel
[3]: #windows-example-of-creating-a-tunnel
[4]: ../../../anselm/job-submission-and-execution.md
Original line number Original line Diff line number Diff line
# X Window System

The X Window system is a principal way to get GUI access to the clusters. The **X Window System** (commonly known as **X11**, based on its current major version being 11, or shortened to simply **X**, and sometimes informally **X-Windows**) is a computer software system and network [protocol][a] that provides a basis for [graphical user interfaces][b] (GUIs) and rich input device capability for [networked computers][c].

!!! tip
    The X display forwarding must be activated and the X server running on client side

## X Display

### Linux Example

In order to display graphical user interface GUI of various software tools, you need to enable the X display forwarding. On Linux and Mac, log in using the -X option in ssh client:

```console
 local $ ssh -X username@cluster-name.it4i.cz
```

### PuTTY on Windows

On Windows use the PuTTY client to enable X11 forwarding. In PuTTY menu, go to Connection-SSH-X11, mark the Enable X11 forwarding checkbox before logging in.

![](../../../img/cygwinX11forwarding.png)

Then log in as usual.

### WSL (Bash on Windows)

In order to display graphical user interface GUI of various software tools, you need to enable the X display forwarding. Log in using the -X option in ssh client:

```console
 local $ ssh -X username@cluster-name.it4i.cz
```

!!! tip
    If you are getting error message "cannot open display", then try to export DISPLAY variable, before attempting to log in:

```console
 local $ export DISPLAY=localhost:0.0
```

## Verify the Forwarding

To verify the forwarding, type

```console
$ echo $DISPLAY
```

if you receive something like

```console
localhost:10.0
```

then the X11 forwarding is enabled.

## X Server

In order to display graphical user interface GUI of various software tools, you need running X server on your desktop computer. For Linux users, no action is required as the X server is the default GUI environment on most Linux distributions. Mac and Windows users need to install and run the X server on their workstations.

### X Server on OS X

Mac OS users need to install [XQuartz server][d].

### X Server on Windows

There are variety of X servers available for Windows environment. The commercial Xwin32 is very stable and rich featured. The Cygwin environment provides fully featured open-source XWin X server. For simplicity, we recommend open-source X server by the [Xming project][e]. For stability and full features we recommend the [XWin][f] X server by Cygwin

| How to use Xwin | How to use Xming |
|--- | --- |
| [Install Cygwin][g]. Find and execute XWin.exe to start the X server on Windows desktop computer.[If no able to forward X11 using PuTTY to CygwinX][1] | Use Xlaunch to configure the Xming. Run Xming to start the X server on Windows desktop computer. |

Read more [here][h].

## Running GUI Enabled Applications

!!! note
    Make sure that X forwarding is activated and the X server is running.

Then launch the application as usual. Use the & to run the application in background.

```console
$ ml intel (idb and gvim not installed yet)
$ gvim &
```

```console
$ xterm
```

In this example, we activate the intel programing environment tools, then start the graphical gvim editor.

## GUI Applications on Compute Nodes

Allocate the compute nodes using -X option on the qsub command

```console
$ qsub -q qexp -l select=2:ncpus=24 -X -I
```

In this example, we allocate 2 nodes via qexp queue, interactively. We request X11 forwarding with the -X option. It will be possible to run the GUI enabled applications directly on the first compute node.

**Better performance** is obtained by logging on the allocated compute node via ssh, using the -X option.

```console
$ ssh -X r24u35n680
```

In this example, we log in on the r24u35n680 compute node, with the X11 forwarding enabled.

## Gnome GUI Environment

The Gnome 2.28 GUI environment is available on the clusters. We recommend to use separate X server window for displaying the Gnome environment.

### Gnome on Linux and OS X

To run the remote Gnome session in a window on Linux/OS X computer, you need to install Xephyr. Ubuntu package is
xserver-xephyr, on OS X it is part of [XQuartz][i]. First, launch Xephyr on local machine:

```console
local $ Xephyr -ac -screen 1024x768 -br -reset -terminate :1 &
```

This will open a new X window with size 1024 x 768 at DISPLAY :1. Next, ssh to the cluster with DISPLAY environment variable set and launch gnome-session

```console
local $ DISPLAY=:1.0 ssh -XC yourname@cluster-name.it4i.cz -i ~/.ssh/path_to_your_key
... cluster-name MOTD...
yourname@login1.cluster-namen.it4i.cz $ gnome-session &
```

On older systems where Xephyr is not available, you may also try Xnest instead of Xephyr. Another option is to launch a new X server in a separate console, via:

```console
xinit /usr/bin/ssh -XT -i .ssh/path_to_your_key yourname@cluster-namen.it4i.cz gnome-session -- :1 vt12
```

However this method does not seem to work with recent Linux distributions and you will need to manually source
/etc/profile to properly set environment variables for PBS.

### Gnome on Windows

Use XLaunch to start the Xming server or run the XWin.exe. Select the "One window" mode.

Log in to the cluster, using [PuTTY][2] or [Bash on Windows][3]. On the cluster, run the gnome-session command.

```console
$ gnome-session &
```

In this way, we run remote gnome session on the cluster, displaying it in the local X server

Use System-Log Out to close the gnome-session

[1]: #if-no-able-to-forward-x11-using-putty-to-cygwinx
[2]: #putty-on-windows
[3]: #wsl-bash-on-windows

[a]: http://en.wikipedia.org/wiki/Protocol_%28computing%29
[b]: http://en.wikipedia.org/wiki/Graphical_user_interface
[c]: http://en.wikipedia.org/wiki/Computer_network
[d]: https://www.xquartz.org
[e]: http://sourceforge.net/projects/xming/
[f]: http://x.cygwin.com/
[g]: http://x.cygwin.com/
[h]: http://www.math.umn.edu/systems_guide/putty_xwin32.shtml
[i]: http://xquartz.macosforge.org/landing/
Original line number Original line Diff line number Diff line
# PuTTY (Windows)

## Windows PuTTY Installer

We recommned you to download "**A Windows installer for everything except PuTTYtel**" with **Pageant** (SSH authentication agent) and **PuTTYgen** (PuTTY key generator) which is available [here][a].

!!! note
    After installation you can proceed directly to private keys authentication using ["Putty"][1].

    "Change Password for Existing Private Key" is optional.

    "Generate a New Public/Private key pair" is intended for users without Public/Private key in the initial email containing login credentials.

    "Pageant" is optional.

## PuTTY - How to Connect to the IT4Innovations Cluster

* Run PuTTY
* Enter Host name and Save session fields with login address and browse Connection -  SSH - Auth menu. The _Host Name_ input may be in the format **"username@clustername.it4i.cz"** so you don't have to type your login each time.In this example we will connect to the Salomon cluster using **"salomon.it4i.cz"**.

![](../../../img/PuTTY_host_Salomon.png)

* Category - Connection -  SSH - Auth:
      Select Attempt authentication using Pageant.
      Select Allow agent forwarding.
      Browse and select your [private key][2] file.

![](../../../img/PuTTY_keyV.png)

* Return to Session page and Save selected configuration with _Save_ button.

![](../../../img/PuTTY_save_Salomon.png)

* Now you can log in using _Open_ button.

![](../../../img/PuTTY_open_Salomon.png)

* Enter your username if the _Host Name_ input is not in the format "username@salomon.it4i.cz".
* Enter passphrase for selected [private key][2] file if Pageant **SSH authentication agent is not used.**

## Another PuTTY Settings

* Category - Windows - Translation - Remote character set and select **UTF-8**.
* Category - Terminal - Features and select **Disable application keypad mode** (enable numpad)
* Save your configuration on Session page in to Default Settings with _Save_ button.

## Pageant SSH Agent

Pageant holds your private key in memory without needing to retype a passphrase on every login.

* Run Pageant.
* On Pageant Key List press _Add key_ and select your private key (id_rsa.ppk).
* Enter your passphrase.
* Now you have your private key in memory without needing to retype a passphrase on every login.

![](../../../img/PageantV.png)

## PuTTY Key Generator

PuTTYgen is the PuTTY key generator. You can load in an existing private key and change your passphrase or generate a new public/private key pair.

### Change Password for Existing Private Key

You can change the password of your SSH key with "PuTTY Key Generator". Make sure to backup the key.

* Load your [private key][2] file with _Load_ button.
* Enter your current passphrase.
* Change key passphrase.
* Confirm key passphrase.
* Save your private key with _Save private key_ button.

![](../../../img/PuttyKeygeneratorV.png)

### Generate a New Public/Private Key

You can generate an additional public/private key pair and insert public key into authorized_keys file for authentication with your own private key.

* Start with _Generate_ button.

![](../../../img/PuttyKeygenerator_001V.png)

* Generate some randomness.

![](../../../img/PuttyKeygenerator_002V.png)

* Wait.

![](../../../img/PuttyKeygenerator_003V.png)

* Enter a _comment_ for your key using format 'username@organization.example.com'.
      Enter key passphrase.
      Confirm key passphrase.
      Save your new private key in "_.ppk" format with _Save private key\* button.

![](../../../img/PuttyKeygenerator_004V.png)

* Save the public key with _Save public key_ button.
      You can copy public key out of the ‘Public key for pasting into authorized_keys file’ box.

![](../../../img/PuttyKeygenerator_005V.png)

* Export private key in OpenSSH format "id_rsa" using Conversion - Export OpenSSH key

![](../../../img/PuttyKeygenerator_006V.png)

* Now you can insert additional public key into authorized_keys file for authentication with your own private key.
      You must log in using ssh key received after registration. Then proceed to [How to add your own key][2].

[1]: #putty
[2]: ssh-keys.md

[a]: http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html
Original line number Original line Diff line number Diff line
# OpenSSH Keys (UNIX)

## Key Management

After logging in, you can see .ssh/ directory with SSH keys and authorized_keys file:

```console
$ cd /home/username/
$ ls -la .ssh/
    total 24
    drwx------ 2 username username 4096 May 13 15:12 .
    drwxr-x---22 username username 4096 May 13 07:22 ..
    -rw-r--r-- 1 username username 392 May 21 2014 authorized_keys
    -rw------- 1 username username 1675 May 21 2014 id_rsa
    -rw------- 1 username username 1460 May 21 2014 id_rsa.ppk
    -rw-r--r-- 1 username username 392 May 21 2014 id_rsa.pub
```

!!! hint
    Private keys in .ssh directory are without passphrase and allow you to connect within the cluster.

## Access Privileges on .ssh Folder

* .ssh directory: `700 (drwx------)`
* Authorized_keys, known_hosts and public key (.pub file): `644 (-rw-r--r--)`
* Private key (id_rsa/id_rsa.ppk): `600 (-rw-------)`

```console
$ cd /home/username/
$ chmod 700 .ssh/
$ chmod 644 .ssh/authorized_keys
$ chmod 644 .ssh/id_rsa.pub
$ chmod 644 .ssh/known_hosts
$ chmod 600 .ssh/id_rsa
$ chmod 600 .ssh/id_rsa.ppk
```

## Private Key

!!! note
    The path to a private key is usually /home/username/.ssh/

Private key file in `id_rsa` or `*.ppk` format is used to authenticate with the servers. Private key is present locally on local side and used for example in SSH agent Pageant (for Windows users). The private key should always be kept in a safe place.

An example of private key format:

```console
    -----BEGIN RSA PRIVATE KEY-----
    MIIEpAIBAAKCAQEAqbo7jokygnBpG2wYa5NB45ns6+UKTNLMLHF0BO3zmRtKEElE
    aGqXfbYwvXlcuRb2d9/Y5dVpCZHV0kbY3NhtVOcEIe+1ROaiU9BEsUAhMNEvgiLV
    gSql4QvRO4BWPlM8+WAWXDp3oeoBh8glXyuh9teb8yq98fv1r1peYGRrW3/s4V+q
    O1SQ0XY2T7rWCYRLIP6rTMXArTI35v3WU513mn7nm1fJ7oN0QgVH5b0W9V1Kyc4l
    9vILHeMXxvz+i/5jTEfLOJpiRGYZYcaYrE4dIiHPl3IlbV7hlkK23Xb1US8QJr5G
    ADxp1VTkHjY+mKagEfxl1hQIb42JLHhKMEGqNQIDAQABAoIBAQCkypPuxZjL+vai
    UGa5dAWiRZ46P2yrwHPKpvEdpCdDPbLAc1K/CtdBkHZsUPxNHVV6eFWweW99giIY
    Av+mFWC58X8asBHQ7xkmxW0cqAZRzpkRAl9IBS9/fKjO28Fgy/p+suOi8oWbKIgJ
    3LMkX0nnT9oz1AkOfTNC6Tv+3SE7eTj1RPcMjur4W1Cd1N3EljLszdVk4tLxlXBS
    yl9NzVnJJbJR4t01l45VfFECgYEAno1WJSB/SwdZvS9GkfhvmZd3r4vyV9Bmo3dn
    XZAh8HRW13imOnpklDR4FRe98D9A7V3yh9h60Co4oAUd6N+Oc68/qnv/8O9efA+M
    /neI9ANYFo8F0+yFCp4Duj7zPV3aWlN/pd8TNzLqecqh10uZNMy8rAjCxybeZjWd
    DyhgywXhAoGBAN3BCazNefYpLbpBQzwes+f2oStvwOYKDqySWsYVXeVgUI+OWTVZ
    eZ26Y86E8MQO+q0TIxpwou+TEaUgOSqCX40Q37rGSl9K+rjnboJBYNCmwVp9bfyj
    kCLL/3g57nTSqhgHNa1xwemePvgNdn6FZteA8sXiCg5ZzaISqWAffek5AoGBAMPw
    V/vwQ96C8E3l1cH5cUbmBCCcfXM2GLv74bb1V3SvCiAKgOrZ8gEgUiQ0+TfcbAbe
    7MM20vRNQjaLTBpai/BTbmqM1Q+r1KNjq8k5bfTdAoGANgzlNM9omM10rd9WagL5
    yuJcal/03p048mtB4OI4Xr5ZJISHze8fK4jQ5veUT9Vu2Fy/w6QMsuRf+qWeCXR5
    RPC2H0JzkS+2uZp8BOHk1iDPqbxWXJE9I57CxBV9C/tfzo2IhtOOcuJ4LY+sw+y/
    ocKpJbdLTWrTLdqLHwicdn8OxeWot1mOukyK2l0UeDkY6H5pYPtHTpAZvRBd7ETL
    Zs2RP3KFFvho6aIDGrY0wee740/jWotx7fbxxKwPyDRsbH3+1Wx/eX2RND4OGdkH
    gejJEzpk/7y/P/hCad7bSDdHZwO+Z03HIRC0E8yQz+JYatrqckaRCtd7cXryTmTR
    FbvLJmECgYBDpfno2CzcFJCTdNBZFi34oJRiDb+HdESXepk58PcNcgK3R8PXf+au
    OqDBtZIuFv9U1WAg0gzGwt/0Y9u2c8m0nXziUS6AePxy5sBHs7g9C9WeZRz/nCWK
    +cHIm7XOwBEzDKz5f9eBqRGipm0skDZNKl8X/5QMTT5K3Eci2n+lTw==
    -----END RSA PRIVATE KEY-----
```

## Public Key

Public key file in `*.pub` format is used to verify a digital signature. Public key is present on the remote side and allows access to the owner of the matching private key.

An example of public key format:

```console
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCpujuOiTKCcGkbbBhrk0Hjmezr5QpM0swscXQE7fOZG0oQSURoapd9tjC9eVy5FvZ339jl1WkJkdXSRtjc2G1U5wQh77VE5qJT0ESxQCEw0S+CItWBKqXhC9E7gFY+UyP5YBZcOneh6gGHyCVfK6H215vzKr3x+/WvWl5gZGtbf+zhX6o4RJDRdjZPutYJhEsg/qtMxcCtMjfm/dZTnXeafuebV8nug3RCBUflvRb1XUrJuiX28gsd4xfG/P6L/mNMR8s4kmJEZhlhxpj8Th0iIc+XciVtXuGWQrbddcVRLxAmvkYAPGnVVOQeNj69pqAR/GXaFAhvjYkseEowQao1 username@organization.example.com
```

## How to Add Your Own Key

First, generate a new keypair of your public and private key:

```console
local $ ssh-keygen -C 'username@organization.example.com' -f additional_key
```

!!! note
    Please, enter **strong** **passphrase** for securing your private key.

You can insert additional public key into authorized_keys file for authentication with your own private key. Additional records in authorized_keys file must be delimited by new line. Users are not advised to remove the default public key from authorized_keys file.

Example:

```console
$ cat additional_key.pub > ~/.ssh/authorized_keys
```

In this example, we add an additional public key, stored in file additional_key.pub into the authorized_keys. Next time we log in, we will be able to use the private addtional_key key to log in.

## How to Remove Your Own Key

Removing your key from authorized_keys can be done simply by deleting the corresponding public key which can be identified by a comment at the end of line (e.g. _username@organization.example.com_).
Original line number Original line Diff line number Diff line
# VPN Access

## Accessing IT4Innovations Internal Resources via VPN

For using resources and licenses which are located at IT4Innovations local network, it is necessary to VPN connect to this network. We use Cisco AnyConnect Secure Mobility Client, which is supported on the following operating systems:

* Windows XP
* Windows Vista
* Windows 7
* Windows 8
* Linux
* MacOS

It is impossible to connect to VPN from other operating systems.

## VPN Client Installation

You can install VPN client from web interface after successful login with [IT4I credentials][1] [here][a].

![](../../img/vpn_web_login.png)

According to the Java settings after login, the client either automatically installs, or downloads installation file for your operating system. It is necessary to allow start of installation tool for automatic installation. If auto install does not start, then proceed with manual installation described in next steps.

![](../../img/vpn_web_login_2.png)
![](../../img/vpn_web_install_2.png)
![](../../img/copy_of_vpn_web_install_3.png)

After successful installation, VPN connection will be established and you can use available resources from IT4I network.

![](../../img/vpn_web_install_4.png)

If your Java setting doesn't allow automatic installation, you can download installation file and install VPN client manually.

![](../../img/vpn_web_download.png)

After you click on the link, download of installation file will start.

![](../../img/vpn_web_download_2.png)

After successful download of installation file, you have to execute this executable with administrator or root rights and install VPN client manually.

## Working With VPN Client

You can use graphical user interface or command line interface to run VPN client on all supported operating systems. We suggest using GUI.

Before the first login to VPN, you have to fill URL **[https://vpn.it4i.cz/user][a]** into the text field.

![](../../img/vpn_contacting_https_cluster.png)

After you click on the Connect button, you must fill your login credentials.

![](../../img/vpn_contacting_https.png)

After a successful login, the client will minimize to the system tray. If everything works, you can see a lock in the Cisco tray icon.

![](../../img/anyconnecticon.jpg)

If you right-click on this icon, you will see a context menu in which you can control the VPN connection.

![](../../img/anyconnectcontextmenu.jpg)

When you connect to the VPN for the first time, the client downloads the profile and creates a new item "IT4I cluster" in the connection list. For subsequent connections, it is not necessary to re-enter the URL address, but just select the corresponding item.

![](../../img/vpn_contacting.png)

Then AnyConnect automatically proceeds like in the case of first logon.

![](../../img/vpn_login.png)

After a successful logon, you can see a green circle with a tick mark on the lock icon.

![](../../img/vpn_successfull_connection.png)

For disconnecting, right-click on the AnyConnect client icon in the system tray and select **VPN Disconnect**.

[1]: ../../general/obtaining-login-credentials/obtaining-login-credentials.md#login-credentials

[a]: https://vpn.it4i.cz/user
Original line number Original line Diff line number Diff line
# Applying for Resources

Computational resources may be allocated by any of the following [Computing resources allocation][a] mechanisms.

Academic researchers can apply for computational resources via  [Open Access Competitions][b].

Anyone is welcomed to apply via the [Directors Discretion][c].

Foreign (mostly European) users can obtain computational resources via the [PRACE (DECI) program][d].

In all cases, IT4Innovations’ access mechanisms are aimed at distributing computational resources while taking into account the development and application of supercomputing methods and their benefits and usefulness for society. The applicants are expected to submit a proposal. In the proposal, the applicants **apply for a particular amount of core-hours** of computational resources. The requested core-hours should be substantiated by scientific excellence of the proposal, its computational maturity and expected impacts. Proposals do undergo a scientific, technical and economic evaluation. The allocation decisions are based on this evaluation. More information at [Computing resources allocation][a] and [Obtaining Login Credentials][1] page.

[1]: obtaining-login-credentials/obtaining-login-credentials.md

[a]: http://www.it4i.cz/computing-resources-allocation/?lang=en
[b]: http://www.it4i.cz/open-access-competition/?lang=en&lang=en
[c]: http://www.it4i.cz/obtaining-computational-resources-through-directors-discretion/?lang=en&lang=en
[d]: http://www.prace-ri.eu/DECI-Projects
Original line number Original line Diff line number Diff line
# Certificates FAQ

FAQ about certificates in general

## Q: What Are Certificates?

IT4Innovations employs X.509 certificates for secure communication (e. g. credentials exchange) and for grid services related to PRACE, as they present a single method of authentication for all PRACE services, where only one password is required.

There are different kinds of certificates, each with a different scope of use. We mention here:

* User (Private) certificates
* Certificate Authority (CA) certificates
* Host certificates
* Service certificates

However, users need only manage User and CA certificates. Note that your user certificate is protected by an associated private key, and this **private key must never be disclosed**.

## Q: Which X.509 Certificates Are Recognised by IT4Innovations?

[The Certificates for Digital Signatures][1].

## Q: How Do I Get a User Certificate That Can Be Used With IT4Innovations?

To get a certificate, you must make a request to your local, IGTF approved, Certificate Authority (CA). Usually you then must visit, in person, your nearest Registration Authority (RA) to verify your affiliation and identity (photo identification is required). Usually, you will then be emailed details on how to retrieve your certificate, although procedures can vary between CAs. If you are in Europe, you can locate [your trusted CA][a].

In some countries certificates can also be retrieved using the TERENA Certificate Service, see the FAQ below for the link.

## Q: Does IT4Innovations Support Short Lived Certificates (SLCS)?

Yes, provided that the CA which provides this service is also a member of IGTF.

## Q: Does IT4Innovations Support the TERENA Certificate Service?

 Yes, ITInnovations supports TERENA eScience personal certificates. For more information, visit [TCS - Trusted Certificate Service][b], where you also can find if your organisation/country can use this service.

## Q: What Format Should My Certificate Take?

User Certificates come in many formats, the three most common being the ’PKCS12’, ’PEM’ and the JKS formats.

The PKCS12 (often abbreviated to ’p12’) format stores your user certificate, along with your associated private key, in a single file. This form of your certificate is typically employed by web browsers, mail clients, and grid services like UNICORE, DART, gsissh-term and Globus toolkit (GSI-SSH, GridFTP and GRAM5).

The PEM format (`*`.pem) stores your user certificate and your associated private key in two separate files. This form of your certificate can be used by PRACE’s gsissh-term and with the grid related services like Globus toolkit (GSI-SSH, GridFTP and GRAM5).

To convert your Certificate from PEM to p12 formats, and _vice versa_, IT4Innovations recommends using the openssl tool (see separate FAQ entry).

JKS is the Java KeyStore and may contain both your personal certificate with your private key and a list of your trusted CA certificates. This form of your certificate can be used by grid services like DART and UNICORE6.

To convert your Certificate from p12 to JKS, IT4Innovations recommends using the keytool utiliy (see separate FAQ entry).

## Q: What Are CA Certificates?

Certification Authority (CA) certificates are used to verify the link between your user certificate and the authority which issued it. They are also used to verify the link between the host certificate of a IT4Innovations server and the CA which issued that certificate. In essence they establish a chain of trust between you and the target server. Thus, for some grid services, users must have a copy of all the CA certificates.

To assist users, SURFsara (a member of PRACE) provides a complete and up-to-date bundle of all the CA certificates that any PRACE user (or IT4Innovations grid services user) will require. Bundle of certificates, in either p12, PEM or JKS formats, are [available here][c].

It is worth noting that gsissh-term and DART automatically updates their CA certificates from this SURFsara website. In other cases, if you receive a warning that a server’s certificate can not be validated (not trusted), then update your CA certificates via the SURFsara website. If this fails, then contact the IT4Innovations helpdesk.

Lastly, if you need the CA certificates for a personal Globus 5 installation, then you can install the CA certificates from a MyProxy server with the following command.

```console
    myproxy-get-trustroots -s myproxy-prace.lrz.de
```

If you run this command as ’root’, then it will install the certificates into /etc/grid-security/certificates. If you run this not as ’root’, then the certificates will be installed into $HOME/.globus/certificates. For Globus, you can download the globuscerts.tar.gz packet [available here][c].

## Q: What Is a DN and How Do I Find Mine?

DN stands for Distinguished Name and is part of your user certificate. IT4Innovations needs to know your DN to enable your account to use the grid services. You may use openssl (see below) to determine your DN or, if your browser contains your user certificate, you can extract your DN from your browser.

For Internet Explorer users, the DN is referred to as the "subject" of your certificate. ToolsInternet OptionsContentCertificatesViewDetailsSubject.

For users running Firefox under Windows, the DN is referred to as the "subject" of your certificate. ToolsOptionsAdvancedEncryptionView Certificates. Highlight your name and then Click ViewDetailsSubject.

## Q: How Do I Use the Openssl Tool?

The following examples are for Unix/Linux operating systems only.

To convert from PEM to p12, enter the following command:

```console
    openssl pkcs12 -export -in usercert.pem -inkey userkey.pem -out
    username.p12
```

To convert from p12 to PEM, type the following _four_ commands:

```console
    openssl pkcs12 -in username.p12 -out usercert.pem -clcerts -nokeys
    openssl pkcs12 -in username.p12 -out userkey.pem -nocerts
    chmod 444 usercert.pem
    chmod 400 userkey.pem
```

To check your Distinguished Name (DN), enter the following command:

```console
    openssl x509 -in usercert.pem -noout -subject -nameopt
    RFC2253
```

To check your certificate (e.g., DN, validity, issuer, public key algorithm, etc.), enter the following command:

```console
    openssl x509 -in usercert.pem -text -noout
```

To download openssl if not pre-installed, see [here][d]. On Macintosh Mac OS X computers openssl is already pre-installed and can be used immediately.

## Q: How Do I Create and Then Manage a Keystore?

IT4innovations recommends the java based keytool utility to create and manage keystores, which themselves are stores of keys and certificates. For example if you want to convert your pkcs12 formatted key pair into a java keystore you can use the following command.

```console
    keytool -importkeystore -srckeystore $my_p12_cert -destkeystore
    $my_keystore -srcstoretype pkcs12 -deststoretype jks -alias
    $my_nickname -destalias $my_nickname
```

where $my_p12_cert is the name of your p12 (pkcs12) certificate, $my_keystore is the name that you give to your new java keystore and $my_nickname is the alias name that the p12 certificate was given and is used also for the new keystore.

You also can import CA certificates into your java keystore with the tool, e.g.:

```console
    keytool -import -trustcacerts -alias $mydomain -file $mydomain.crt -keystore $my_keystore
```

where $mydomain.crt is the certificate of a trusted signing authority (CA) and $mydomain is the alias name that you give to the entry.

More information on the tool can be found [here][e].

## Q: How Do I Use My Certificate to Access the Different Grid Services?

Most grid services require the use of your certificate; however, the format of your certificate depends on the grid Service you wish to employ.

If employing the PRACE version of GSISSH-term (also a Java Web Start Application), you may use either the PEM or p12 formats. Note that this service automatically installs up-to-date PRACE CA certificates.

If the grid service is UNICORE, then you bind your certificate, in either the p12 format or JKS, to UNICORE during the installation of the client on your local machine. For more information visit [UNICORE6 in PRACE][f].

If the grid service is part of Globus, such as GSI-SSH, GriFTP or GRAM5, then the certificates can be in either p12 or PEM format and must reside in the "$HOME/.globus" directory for Linux and Mac users or %HOMEPATH%.globus for Windows users. (Windows users will have to use the DOS command ’cmd’ to create a directory which starts with a ’.’). Further, user certificates should be named either "usercred.p12" or "usercert.pem" and "userkey.pem", and the CA certificates must be kept in a pre-specified directory as follows. For Linux and Mac users, this directory is either $HOME/.globus/certificates or /etc/grid-security/certificates. For Windows users, this directory is %HOMEPATH%.globuscertificates. (If you are using GSISSH-Term from prace-ri.eu then you do not have to create the .globus directory nor install CA certificates to use this tool alone).

## Q: How Do I Manually Import My Certificate Into My Browser?

If you employ the Firefox browser, then you can import your certificate by first choosing the "Preferences" window. For Windows, this is ToolsOptions. For Linux, this is EditPreferences. For Mac, this is FirefoxPreferences. Then, choose the "Advanced" button; followed by the "Encryption" tab. Then, choose the "Certificates" panel; select the option "Select one automatically" if you have only one certificate, or "Ask me every time" if you have more then one. Then click on the "View Certificates" button to open the "Certificate Manager" window. You can then select the "Your Certificates" tab and click on button "Import". Then locate the PKCS12 (.p12) certificate you wish to import, and employ its associated password.

If you are a Safari user, then simply open the "Keychain Access" application and follow "FileImport items".

If you are an Internet Explorer user, click StartSettingsControl Panel and then double-click on Internet. On the Content tab, click Personal, and then click Import. In the Password box, type your password. NB you may be prompted multiple times for your password. In the "Certificate File To Import" box, type the filename of the certificate you wish to import, and then click OK. Click Close, and then click OK.

## Q: What Is a Proxy Certificate?

A proxy certificate is a short-lived certificate which may be employed by UNICORE and the Globus services. The proxy certificate consists of a new user certificate and a newly generated proxy private key. This proxy typically has a rather short lifetime (normally 12 hours) and often only allows a limited delegation of rights. Its default location, for Unix/Linux, is /tmp/x509_u_uid_ but can be set via the $X509_USER_PROXY environment variable.

## Q: What Is the MyProxy Service?

[The MyProxy Service][g], can be employed by gsissh-term and Globus tools, and is an online repository that allows users to store long lived proxy certificates remotely, which can then be retrieved for use at a later date. Each proxy is protected by a password provided by the user at the time of storage. This is beneficial to Globus users as they do not have to carry their private keys and certificates when travelling; nor do users have to install private keys and certificates on possibly insecure computers.

## Q: Someone May Have Copied or Had Access to the Private Key of My Certificate Either in a Separate File or in the Browser. What Should I Do?

Please ask the CA that issued your certificate to revoke this certificate and to supply you with a new one. In addition, report this to IT4Innovations by contacting [the support team][h].

## Q: My Certificate Expired. What Should I Do?

In order to still be able to communicate with us, one has to make a request for the new certificate to your Certificate Authority (CA). There is no need to explicitly send us any information about your new certificate if a new one has the same Distinguished Name (DN) as the old one.

[1]: #the-certificates-for-digital-signatures

[a]: https://www.eugridpma.org/members/worldmap/
[b]: https://tcs-escience-portal.terena.org/
[c]: https://winnetou.surfsara.nl/prace/certs/
[d]: https://www.openssl.org/source/
[e]: http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/keytool.html
[f]: http://www.prace-ri.eu/UNICORE6-in-PRACE
[g]: http://grid.ncsa.illinois.edu/myproxy/
[h]: https://support.it4i.cz/rt
Original line number Original line Diff line number Diff line
# Obtaining Login Credentials

## Obtaining Authorization

The computational resources of IT4I are allocated by the Allocation Committee to a Project, investigated by a Primary Investigator. By allocating the computational resources, the Allocation Committee is authorizing the PI to access and use the clusters. The PI may decide to authorize a number of her/his Collaborators to access and use the clusters, to consume the resources allocated to her/his Project. These collaborators will be associated to the Project. The Figure below is depicting the authorization chain:

![](../../img/Authorization_chain.png)

!!! note
    You need to either [become the PI][1] or [be named as a collaborator][2] by a PI in order to access and use the clusters.

Head of Supercomputing Services acts as a PI of a project DD-13-5. Joining this project, you may **access and explore the clusters**, use software, development environment and computers via the qexp and qfree queues. You may use these resources for own education/research, no paperwork is required. All IT4I employees may contact the Head of Supercomputing Services in order to obtain **free access to the clusters**.

## Authorization of PI by Allocation Committee

The PI is authorized to use the clusters by the allocation decision issued by the Allocation Committee. The PI will be informed by IT4I about the Allocation Committee decision.

## Process Flow Chart

This chart describes the process of obtaining login credentials on the clusters. You may skip the tasks, that you have already done. Some of the tasks, marked with asterisk (\*), are clickable and will take you to more detailed description.

* I am collaborator on a project and want to obtain login credetials

<div class="mermaid">
graph TB
id10(I am collaborator on a project and want to obtain login credetials)
id20[Obtain certificate for digital signature]
id10-->id20
id30[EduID organizations from CESNET*]
click id30 "#certificates-for-digital-signatures"
id40[Personal certificate from PostSignum or I.CA]
id50[Free certificate from Comodo*]
click id50 "#alternative-way-to-personal-certificate"
id55[Other trusted certificate]
subgraph ""
id20-->id30
id20-->id40
id20-->id50
id20-->id55
end
id60[Export and save certificate to a file]
id30-->id60
id40-->id60
id50-->id60
id55-->id60
id70[Import certificate into your email client*]
click id70 "#installation-of-the-certificate-into-your-mail-client"
id60-->id70
id80[Send email with request for access to IT4I Support*]
click id80 "#login-credentials"
id70-->id80
</div>

* I am Primary Investigator and I want to allow my collaborators to access my project

<div class="mermaid">
graph TB
id110(I am Primary Investigator and I want to allow my collaborators to access my project)
id120[Obtain certificate for digital signature]
id110-->id120
id130[EduID organizations from CESNET*]
click id130 "#certificates-for-digital-signatures"
id140[Personal certificate from PostSignum or I.CA]
id150[Free certificate from Comodo*]
click id150 "#alternative-way-to-personal-certificate"
id155[Other trusted certificate]
subgraph ""
id120-->id130
id120-->id140
id120-->id150
id120-->id155
end
id160[Export and save certificate to a file]
id130-->id160
id140-->id160
id150-->id160
id155-->id160
id170[Import certificate into your email client*]
click id170 "#installation-of-the-certificate-into-your-mail-client"
id160-->id170
id180[Send email with request for authorization to IT4I Support*]
click id180 "#authorization-by-e-mail-an-alternative-approach"
id170-->id180
</div>

* I am an existing User / Primary Investigator and I want to manage my Projects / Users

<div class="mermaid">
graph TB
id210(I am an existing User / Primary Investigator and I want to manage my Projects / Users)
id220[Log in to extranet.it4i.cz]
id230[Go to Projects section]
id210-->id220
id220-->id230
id240[Submit request to become project member]
id245[Wait for approval from Primary Investigator]
id230-->|User|id240
id240-->id245
id250[Wait for user to submit the request to become project member]
id255[Approve or deny user requests for becoming project members]
id230-->|Primary Investigator|id250
id250-->id255
id240-.->id255
</div>

## Login Credentials

Once authorized by PI, every person (PI or Collaborator) wishing to access the clusters, should contact the [IT4I support][a] (E-mail: [support\[at\]it4i.cz][b]) providing following information:

1. Project ID
1. Full name and affiliation
1. Statement that you have read and accepted the [Acceptable use policy document][c] (AUP).
1. Attach the AUP file.
1. Your preferred username, min 3, max 12 characters long. The preferred username must associate your surname and name or be otherwise derived from it. Only alphanumeric sequences and dash signs are allowed.
1. In case you choose [Alternative way to personal certificate][3], a **scan of photo ID** (personal ID or passport or driver license) is required

!!! warning
    Should the above information be provided by e-mail, the e-mail **must be** digitally signed. Read more on [digital signatures][4] below.

!!! hint
    VSB associates will bee given VSB login username.

Example (except the subject line which must be in English, you may use Czech or Slovak language for communication with us):

```console
Subject: Access to IT4Innovations

Dear support,

Please open the user account for me and attach the account to OPEN-0-0
Name and affiliation: John Smith, john.smith@myemail.com, Department of Chemistry, MIT, US
I have read and accept the Acceptable use policy document (attached)

Preferred username: johnsm

Thank you,
John Smith
(Digitally signed)
```

You will receive your personal login credentials by protected e-mail. The login credentials include:

1. username
1. ssh private key and private key passphrase
1. system password

The clusters are accessed by the [private key][5] and username. Username and password are used for login to the [information systems][d].

## Authorization by Web

!!! warning
    **Only** for those who already have their IT4I HPC account. This is a preferred way of granting access to project resources. Please, use this method whenever it's possible.

This is a preferred way of granting access to project resources. Please, use this method whenever it's possible.

Log in to the [IT4I Extranet portal][e] using IT4I credentials and go to the **Projects** section.

* **Users:** Please, submit your requests for becoming a project member.
* **Primary Investigators:** Please, approve or deny users' requests in the same section.

## Authorization by E-Mail (An Alternative Approach)

In order to authorize a Collaborator to utilize the allocated resources, the PI should contact the [IT4I support][a] (E-mail: [support\[at\]it4i.cz][b]) and provide following information:

1. Identify your project by project ID
1. Provide list of people, including himself, who are authorized to use the resources allocated to the project. The list must include full name, e-mail and affiliation. Provide usernames as well, if collaborator login access already exists on the IT4I systems.
1. Include "Authorization to IT4Innovations" into the subject line.

!!! warning
    Should the above information be provided by e-mail, the e-mail **must be** digitally signed. Read more on [digital signatures][4] below.

Example (except the subject line which must be in English, you may use Czech or Slovak language for communication with us):

```console
Subject: Authorization to IT4Innovations

Dear support,

Please include my collaborators to project OPEN-0-0.

John Smith, john.smith@myemail.com, Department of Chemistry, MIT, US
Jonas Johansson, jjohansson@otheremail.se, Department of Physics, RIT, Sweden
Luisa Fibonacci, lf@emailitalia.it, Department of Mathematics, National Research Council, Italy

Thank you,
PI
(Digitally signed)
```

## Change Passphrase

On Linux, use

```console
local $ ssh-keygen -f id_rsa -p
```

On Windows, use [PuTTY Key Generator][6].

## Certificates for Digital Signatures

We accept personal certificates issued by any widely respected certification authority (CA). This includes certificates by CAs organized in [International Grid Trust Federation][f], its European branch [EUGridPMA][g] and its member organizations, e.g. the [CESNET certification authority][h]. The Czech _"Qualified certificate" (Kvalifikovaný certifikát)_ provided by [PostSignum][i] or [I.CA][j], that is used in electronic contact with Czech authorities is accepted as well.

Certificate generation process for academic purposes, utilizing the CESNET certification authority, is well-described here:

* [How to generate a personal TCS certificate in Mozilla Firefox web browser.][k] (in Czech)

!!! note
    Certificate file can be installed into your email client. Web-based email interfaces cannot be used for secure communication, external application, such as Thunderbird or Outlook must be used. This way, your new credentials will be visible only in applications, that have access to your certificate.

If you are not able to obtain certificate from any of the respected certification authorities, follow the Alternative Way bellow.

A FAQ about certificates can be found here: [Certificates FAQ][7].

## Alternative Way to Personal Certificate

Follow these steps **only** if you can not obtain your certificate in a standard way. In case you choose this procedure, attach a **scan of photo ID** (personal ID or passport or drivers license) when applying for login credentials.

!!! warning
    Please use Firefox (clone) for following steps. Other browsers, like Chrome, are not compatible.

* Go to [COMODO Application for Secure Email Certificate][l].
* Fill in the form, accept the Subscriber Agreement and submit it by the _Next_ button.
  * Type in the e-mail address, which you intend to use for communication with us.
  * Don't forget your chosen _Revocation password_.
* You will receive an e-mail with link to collect your certificate. Be sure to open the link in the same browser, in which you submited the application.
* Your browser should notify you, that the certificate has been correctly installed in it. Now you will need to save it as a file.
* In Firefox navigate to _Options > Advanced > Certificates > View Certificates_.
* Choose the _Your Certificates_ tab and find the fresh certificate with today's date.
* Select it and hit the _Backup..._ button
* Standard save dialog should appear, where you can choose a name for the certificate file for easy identification in the future.
* You will be prompted to choose a passphrase for your new certificate. This passphrase will be needed for installation into your favourite email client.

!!! note
    Certificate file now can be installed into your email client. Web-based email interfaces cannot be used for secure communication, external application, such as Thunderbird or Outlook must be used (instructions bellow). This way, your new credentials will be visible only in applications, that have access to your certificate.

## Installation of the Certificate Into Your Mail Client

The procedure is similar to the following guides:

MS Outlook 2010

* [How to Remove, Import, and Export Digital certificates][m]
* [Importing a PKCS #12 certificate (in Czech)][o]

Mozilla Thudnerbird

* [Installing an SMIME certificate][p]
* [Importing a PKCS #12 certificate (in Czech)][q]

## End of User Account Lifecycle

User accounts are supported by membership in active Project(s) or by affiliation to IT4Innovations. User accounts, that loose the support (meaning, are not attached to an active project and are not affiliated with IT4I), will be deleted 1 year after the last project to which they were attached expires.

User will get 3 automatically generated warning e-mail messages of the pending removal:.

* First message will be sent 3 months before the removal
* Second message will be sent 1 month before the removal
* Third message will be sent 1 week before the removal.

These messages will inform about the projected removal date and will challenge the user to migrate her/his data.

[1]: ../applying-for-resources.md
[2]: #authorization-by-web
[3]: #alternative-way-to-personal-certificate
[4]: #certificates-for-digital-signatures
[5]: ../accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md
[6]: ../accessing-the-clusters/shell-access-and-data-transfer/putty.md#putty-key-generator
[7]: ../obtaining-login-credentials/certificates-faq.md

[a]: https://support.it4i.cz/rt/
[b]: mailto:support@it4i.cz
[c]: http://www.it4i.cz/acceptable-use-policy.pdf
[d]: http://support.it4i.cz/
[e]: https://extranet.it4i.cz
[f]: http://www.igtf.net/
[g]: https://www.eugridpma.org
[h]: https://tcs.cesnet.cz
[i]: http://www.postsignum.cz/
[j]: http://www.ica.cz/Kvalifikovany-certifikat.aspx
[k]: http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/moz-cert-gen
[l]: https://secure.comodo.com/products/frontpage?area=SecureEmailCertificate
[m]: http://support.microsoft.com/kb/179380
[o]: http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/outl-cert-imp
[p]: https://support.globalsign.com/customer/portal/articles/1214955-install-certificate---mozilla-thunderbird
[q]: http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/moz-cert-imp
Original line number Original line Diff line number Diff line
# Resource Allocation and Job Execution

To run a [job][1], computational resources for this particular job must be allocated. This is done via the PBS Pro job workload manager software, which distributes workloads across the supercomputer. Extensive information about PBS Pro can be found in the [PBS Pro User's Guide][2].

## Resources Allocation Policy

The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. [The Fair-share][3] ensures that individual users may consume approximately equal amount of resources per week. The resources are accessible via queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. Following queues are are the most important:

* **qexp**, the Express queue
* **qprod**, the Production queue
* **qlong**, the Long queue
* **qmpp**, the Massively parallel queue
* **qnvidia**, **qmic**, **qfat**, the Dedicated queues
* **qfree**, the Free resource utilization queue

!!! note
    Check the queue status [here][a].

Read more on the [Resource AllocationPolicy][4] page.

## Job Submission and Execution

!!! note
    Use the **qsub** command to submit your jobs.

The qsub submits the job into the queue. The qsub command creates a request to the PBS Job manager for allocation of specified resources. The **smallest allocation unit is entire node, 16 cores**, with exception of the qexp queue. The resources will be allocated when available, subject to allocation policies and constraints. **After the resources are allocated the jobscript or interactive shell is executed on first of the allocated nodes.**

Read more on the [Job submission and execution][5] page.

## Capacity Computing

!!! note
    Use Job arrays when running huge number of jobs.

Use GNU Parallel and/or Job arrays when running (many) single core jobs.

In many cases, it is useful to submit huge (100+) number of computational jobs into the PBS queue system. Huge number of (small) jobs is one of the most effective ways to execute embarrassingly parallel calculations, achieving best runtime, throughput and computer utilization. In this chapter, we discuss the the recommended way to run huge number of jobs, including **ways to run huge number of single core jobs**.

Read more on [Capacity computing][6] page.

[1]: #terminology-frequently-used-on-these-pages
[2]: ../pbspro.md
[3]: ../salomon/job-priority.md#fair-share-priority
[4]: ../salomon/resources-allocation-policy.md
[5]: ../salomon/job-submission-and-execution.md
[6]: ../salomon/capacity-computing.md

[a]: https://extranet.it4i.cz/

docs.it4i/index.md

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/pbspro.md

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/prace.md

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/robots.txt

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/src/css.css

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

material-new/404.html

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

material/__init__.py

0 → 100644
+0 −0
Original line number Original line Diff line number Diff line

material/base.html

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

material/main.html

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

mkdocs.yml

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

package.json

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

pathcheck.sh

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

requirements.txt

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/add_version.sh

0 → 100755
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/clean_json.sh

0 → 100755
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/colors.sh

0 → 100755
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/get_cvs.sh

0 → 100755
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/get_modules.sh

0 → 100755
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/titlemd.py

0 → 100755
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

snippets/mathjax.md

0 → 100644
+0 −0

File added.

Preview size limit exceeded, changes collapsed.