Compare revisions

5228c88d · 484f835f · ef3b6417 · 24ab3cf0 · b0bb60b7 · 14862d36
--- a/.gitignore
+++ b/.gitignore
+site/
+scripts/*.csv
+venv/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+stages:
+  - test
+  - build
+  - deploy
+variables:
+    PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
+docs:
+  stage: test
+  image: davidhrbac/docker-mdcheck:latest
+  allow_failure: true
+  script:
+  - mdl -r ~MD024,~MD013,~MD033,~MD014,~MD026,~MD037,~MD036,~MD010,~MD029 *.md docs.it4i # BUGS
+capitalize:
+  stage: test
+  image: davidhrbac/docker-mkdocscheck:latest
+  # allow_failure: true
+  script:
+  - find mkdocs.yml docs.it4i/ \( -name '*.md' -o -name '*.yml' \) -print0 | xargs -0 -n1 scripts/titlemd_test.py
+    #spell check:
+    #stage: test
+    #image: davidhrbac/docker-npmcheck:latest
+    #allow_failure: true
+    #script:
+    #- npm i markdown-spellcheck -g
+    #- mdspell '**/*.md' '!docs.it4i/module*.md' -rns --en-us
+ext_links:
+  stage: test
+  image: davidhrbac/docker-mdcheck:latest
+  allow_failure: true
+  after_script:
+  # remove JSON results
+  - rm *.json
+  script:
+  - find docs.it4i/ -name '*.md' -exec grep --color -l http {} + | xargs awesome_bot -t 10 --allow-dupe --allow-redirect
+  only:
+  - master
+mkdocs:
+  stage: build
+  image: davidhrbac/docker-mkdocscheck:latest
+  cache:
+    paths:
+      - .cache/pip
+      - venv/
+  before_script:
+  - python -V               # Print out python version for debugging
+  - pip install virtualenv
+  - virtualenv venv
+  - source venv/bin/activate
+  - pip install -r requirements.txt
+  script:
+  - mkdocs -V
+    # add version to footer
+  - bash scripts/add_version.sh
+    # get modules list from clusters
+  - bash scripts/get_modules.sh
+    #generate site_url
+  - sed "s/\(site_url.*$\)/\1devel\/$CI_BUILD_REF_NAME\//" mkdocs.yml | head
+  - (if [ "${CI_BUILD_REF_NAME}" != 'hrb3' ]; then sed -i "s/\(site_url.*$\)/\1devel\/$CI_BUILD_REF_NAME\//" mkdocs.yml;fi);
+    # regenerate modules matrix
+  - python scripts/modules-matrix.py > docs.it4i/modules-matrix.md
+  - python scripts/modules-json.py > docs.it4i/modules-matrix.json
+  - curl -f0 https://scs-test.it4i.cz/devel/apidocs/master/scs_api.server_public.md -o docs.it4i/apiv1.md
+    # build pages
+  - mkdocs build
+    # compress search_index.json
+    #- bash scripts/clean_json.sh site/mkdocs/search_index.json
+    # replace broken links in 404.html
+  - sed -i 's,href="" title=",href="/" title=",g' site/404.html
+  - cp site/404.html site/403.html
+  - sed -i 's/404 - Not found/403 - Forbidden/g' site/403.html
+    # compress sitemap
+  - gzip < site/sitemap.xml > site/sitemap.xml.gz
+  artifacts:
+    paths:
+    - site
+    expire_in: 1 week
+deploy to stage:
+  environment: stage
+  stage: deploy
+  image: davidhrbac/docker-mkdocscheck:latest
+  before_script:
+  # install ssh-agent
+  - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
+  - 'which rsync || ( apt-get update -y && apt-get install rsync -y )'
+  # run ssh-agent
+  - eval $(ssh-agent -s)
+  # add ssh key stored in SSH_PRIVATE_KEY variable to the agent store
+  - ssh-add <(echo "$SSH_PRIVATE_KEY")
+  # disable host key checking (NOTE: makes you susceptible to man-in-the-middle attacks)
+  # WARNING: use only in docker container, if you use it with shell you will overwrite your user's ssh config
+  - mkdir -p ~/.ssh
+  - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
+  - useradd -lM nginx
+  script:
+  - chown nginx:nginx site -R
+  - rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
+  only:
+  - branches@sccs/docs.it4i.cz
+deploy to production:
+  environment: production
+  stage: deploy
+  image: davidhrbac/docker-mkdocscheck:latest
+  before_script:
+  # install ssh-agent
+  - 'which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y )'
+  - 'which rsync || ( apt-get update -y && apt-get install rsync -y )'
+  # run ssh-agent
+  - eval $(ssh-agent -s)
+  # add ssh key stored in SSH_PRIVATE_KEY variable to the agent store
+  - ssh-add <(echo "$SSH_PRIVATE_KEY")
+  # disable host key checking (NOTE: makes you susceptible to man-in-the-middle attacks)
+  # WARNING: use only in docker container, if you use it with shell you will overwrite your user's ssh config
+  - mkdir -p ~/.ssh
+  - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
+  - useradd -lM nginx
+  script:
+  - chown nginx:nginx site -R
+  - rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/site/
+  only:
+  - master@sccs/docs.it4i.cz
+  when: manual
--- a/.spelling
+++ b/.spelling
+CAE
+CUBE
+GPU
+GSL
+LMGC90
+LS-DYNA
+MAPDL
+GPI-2
+COM
+.ssh
+Anselm
+IT4I
+IT4Innovations
+PBS
+Salomon
+TurboVNC
+VNC
+DDR3
+DIMM
+InfiniBand
+CUDA
+ORCA
+COMSOL
+API
+GNU
+CUDA
+NVIDIA
+LiveLink
+MATLAB
+Allinea
+LLNL
+Vampir
+Doxygen
+VTune
+TotalView
+Valgrind
+ParaView
+OpenFOAM
+MAX_FAIRSHARE
+MPI4Py
+MPICH2
+PETSc
+Trilinos
+FFTW
+HDF5
+BiERapp
+AVX
+AVX2
+JRE
+JDK
+QEMU
+VMware
+VirtualBox
+NUMA
+SMP
+BLAS
+LAPACK
+FFTW3
+Dongarra
+OpenCL
+cuBLAS
+CESNET
+Jihlava
+NVIDIA
+Xeon
+ANSYS
+CentOS
+RHEL
+DDR4
+DIMMs
+GDDR5
+EasyBuild
+e.g.
+MPICH
+MVAPICH2
+OpenBLAS
+ScaLAPACK
+PAPI
+SGI
+UV2000
+400GB
+Mellanox
+RedHat
+ssh.du1.cesnet.cz
+ssh.du2.cesnet.cz
+ssh.du3.cesnet.cz
+DECI
+supercomputing
+AnyConnect
+X11
+backfilling
+backfilled
+SCP
+Lustre
+QDR
+TFLOP
+ncpus
+myjob
+pernode
+mpiprocs
+ompthreads
+qprace
+runtime
+SVS
+ppn
+Multiphysics
+aeroacoustics
+turbomachinery
+CFD
+LS-DYNA
+APDL
+MAPDL
+multiphysics
+AUTODYN
+RSM
+Molpro
+initio
+parallelization
+NWChem
+SCF
+ISV
+profiler
+Pthreads
+profilers
+OTF
+PAPI
+PCM
+uncore
+pre-processing
+prepend
+CXX
+prepended
+POMP2
+Memcheck
+unaddressable
+OTF2
+GPI-2
+GASPI
+GPI
+MKL
+IPP
+TBB
+GSL
+Omics
+VNC
+Scalasca
+IFORT
+interprocedural
+IDB
+cloop
+qcow
+qcow2
+vmdk
+vdi
+virtio
+paravirtualized
+Gbit
+tap0
+UDP
+TCP
+preload
+qfat
+Rmpi
+DCT
+datasets
+dataset
+preconditioners
+partitioners
+PARDISO
+PaStiX
+SuiteSparse
+SuperLU
+ExodusII
+NetCDF
+ParMETIS
+multigrid
+HYPRE
+SPAI
+Epetra
+EpetraExt
+Tpetra
+64-bit
+Belos
+GMRES
+Amesos
+IFPACK
+preconditioner
+Teuchos
+Makefiles
+SAXPY
+NVCC
+VCF
+HGMD
+HUMSAVAR
+ClinVar
+indels
+CIBERER
+exomes
+tmp
+SSHFS
+RSYNC
+unmount
+Cygwin
+CygwinX
+RFB
+TightVNC
+TigerVNC
+GUIs
+XLaunch
+UTF-8
+numpad
+PuTTYgen
+OpenSSH
+IE11
+x86
+r21u01n577
+7120P
+interprocessor
+IPN
+toolchains
+toolchain
+APIs
+easyblocks
+GM200
+GeForce
+GTX
+IRUs
+ASIC
+backplane
+ICEX
+IRU
+PFLOP
+T950B
+ifconfig
+inet
+addr
+checkbox
+appfile
+programmatically
+http
+https
+filesystem
+phono3py
+HDF
+splitted
+automize
+llvm
+PGI
+GUPC
+BUPC
+IBV
+Aislinn
+nondeterminism
+stdout
+stderr
+i.e.
+pthreads
+uninitialised
+broadcasted
+ITAC
+hotspots
+Bioinformatics
+semiempirical
+DFT
+polyfill
+ES6
+HTML5Rocks
+minifiers
+CommonJS
+PhantomJS
+bundlers
+Browserify
+versioning
+isflowing
+ispaused
+NPM
+sublicense
+Streams2
+Streams3
+blogpost
+GPG
+mississippi
+Uint8Arrays
+Uint8Array
+endianness
+styleguide
+noop
+MkDocs
+ - docs.it4i/anselm-cluster-documentation/environment-and-modules.md
+MODULEPATH
+bashrc
+PrgEnv-gnu
+bullx
+MPI
+PrgEnv-intel
+EasyBuild
+ - docs.it4i/anselm-cluster-documentation/capacity-computing.md
+capacity.zip
+README
+ - docs.it4i/anselm-cluster-documentation/compute-nodes.md
+DIMMs
+ - docs.it4i/anselm-cluster-documentation/hardware-overview.md
+cn
+K20
+Xeon
+x86-64
+Virtualization
+virtualization
+NVIDIA
+5110P
+SSD
+lscratch
+login1
+login2
+dm1
+Rpeak
+LINPACK
+Rmax
+E5-2665
+E5-2470
+P5110
+isw
+ - docs.it4i/anselm-cluster-documentation/introduction.md
+RedHat
+ - docs.it4i/anselm-cluster-documentation/job-priority.md
+walltime
+qexp
+_List.fairshare
+_time
+_FAIRSHARE
+1E6
+ - docs.it4i/anselm-cluster-documentation/job-submission-and-execution.md
+15209.srv11
+qsub
+15210.srv11
+pwd
+cn17.bullx
+cn108.bullx
+cn109.bullx
+cn110.bullx
+pdsh
+hostname
+SCRDIR
+mkdir
+mpiexec
+qprod
+Jobscript
+jobscript
+cn108
+cn109
+cn110
+Name0
+cn17
+_NODEFILE
+_O
+_WORKDIR
+mympiprog.x
+_JOBID
+myprog.x
+openmpi
+ - docs.it4i/anselm-cluster-documentation/network.md
+ib0
+ - docs.it4i/anselm-cluster-documentation/prace.md
+PRACE
+qfree
+it4ifree
+it4i.portal.clients
+prace
+1h
+ - docs.it4i/anselm-cluster-documentation/shell-and-data-access.md
+VPN
+ - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-cfx.md
+ANSYS
+CFX
+cfx.pbs
+_r
+ane3fl
+ - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.md
+mapdl.pbs
+_dy
+ - docs.it4i/anselm-cluster-documentation/software/ansys/ls-dyna.md
+HPC
+lsdyna.pbs
+ - docs.it4i/anselm-cluster-documentation/software/chemistry/molpro.md
+OpenMP
+ - docs.it4i/anselm-cluster-documentation/software/compilers.md
+Fortran
+ - docs.it4i/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.md
+E5-2600
+ - docs.it4i/anselm-cluster-documentation/software/debuggers/score-p.md
+Makefile
+ - docs.it4i/anselm-cluster-documentation/software/gpi2.md
+gcc
+cn79
+helloworld
+_gpi.c
+ibverbs
+gaspi
+_logger
+ - docs.it4i/anselm-cluster-documentation/software/intel-suite/intel-compilers.md
+Haswell
+CPUs
+ipo
+O3
+vec
+xAVX
+omp
+simd
+ivdep
+pragmas
+openmp
+xCORE-AVX2
+axCORE-AVX2
+ - docs.it4i/anselm-cluster-documentation/software/kvirtualization.md
+rc.local
+runlevel
+RDP
+DHCP
+DNS
+SMB
+VDE
+smb.conf
+TMPDIR
+run.bat.
+slirp
+NATs
+ - docs.it4i/anselm-cluster-documentation/software/mpi/mpi4py-mpi-for-python.md
+NumPy
+ - docs.it4i/anselm-cluster-documentation/software/numerical-languages/matlab_1314.md
+mpiLibConf.m
+matlabcode.m
+output.out
+matlabcodefile
+sched
+_feature
+ - docs.it4i/anselm-cluster-documentation/software/numerical-languages/matlab.md
+UV2000
+maxNumCompThreads
+SalomonPBSPro
+ - docs.it4i/anselm-cluster-documentation/software/numerical-languages/octave.md
+_THREADS
+_NUM
+ - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/trilinos.md
+CMake-aware
+Makefile.export
+_PACKAGE
+_CXX
+_COMPILER
+_INCLUDE
+_DIRS
+_LIBRARY
+ - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.md
+ansysdyna.pbs
+ - docs.it4i/anselm-cluster-documentation/software/ansys/ansys.md
+svsfem.cz
+_
+ - docs.it4i/anselm-cluster-documentation/software/debuggers/valgrind.md
+libmpiwrap-amd64-linux
+O0
+valgrind
+malloc
+_PRELOAD
+ - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.md
+cn204
+_LIBS
+MAGMAROOT
+_magma
+_server
+_anselm
+_from
+_mic.sh
+_dgetrf
+_mic
+_03.pdf
+ - docs.it4i/anselm-cluster-documentation/software/paraview.md
+cn77
+localhost
+v4.0.1
+ - docs.it4i/anselm-cluster-documentation/storage.md
+ssh.du1.cesnet.cz
+Plzen
+ssh.du2.cesnet.cz
+ssh.du3.cesnet.cz
+tier1
+_home
+_cache
+_tape
+ - docs.it4i/salomon/environment-and-modules.md
+icc
+ictce
+ifort
+imkl
+intel
+gompi
+goolf
+BLACS
+iompi
+iccifort
+ - docs.it4i/salomon/hardware-overview.md
+HW
+E5-4627v2
+ - docs.it4i/salomon/job-submission-and-execution.md
+15209.isrv5
+r21u01n577
+r21u02n578
+r21u03n579
+r21u04n580
+qsub
+15210.isrv5
+pwd
+r2i5n6.ib0.smc.salomon.it4i.cz
+r4i6n13.ib0.smc.salomon.it4i.cz
+r4i7n2.ib0.smc.salomon.it4i.cz
+pdsh
+r2i5n6
+r4i6n13
+r4i7n
+r4i7n2
+r4i7n0
+SCRDIR
+myjob
+mkdir
+mympiprog.x
+mpiexec
+myprog.x
+r4i7n0.ib0.smc.salomon.it4i.cz
+ - docs.it4i/salomon/7d-enhanced-hypercube.md
+cns1
+cns576
+r1i0n0
+r4i7n17
+cns577
+cns1008
+r37u31n1008
+7D
+ - docs.it4i/anselm-cluster-documentation/resources-allocation-policy.md
+qsub
+it4ifree
+it4i.portal.clients
+x86
+x64
+ - docs.it4i/anselm-cluster-documentation/software/ansys/ansys-fluent.md
+anslic
+_admin
+ - docs.it4i/anselm-cluster-documentation/software/chemistry/nwchem.md
+_DIR
+ - docs.it4i/anselm-cluster-documentation/software/comsol-multiphysics.md
+EDU
+comsol
+_matlab.pbs
+_job.m
+mphstart
+ - docs.it4i/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.md
+perf-report
+perf
+txt
+html
+mympiprog
+_32p
+ - docs.it4i/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.md
+Hotspots
+ - docs.it4i/anselm-cluster-documentation/software/debuggers/scalasca.md
+scorep
+ - docs.it4i/anselm-cluster-documentation/software/isv_licenses.md
+edu
+ansys
+_features
+_state.txt
+f1
+matlab
+acfd
+_ansys
+_acfd
+_aa
+_comsol
+HEATTRANSFER
+_HEATTRANSFER
+COMSOLBATCH
+_COMSOLBATCH
+STRUCTURALMECHANICS
+_STRUCTURALMECHANICS
+_matlab
+_Toolbox
+_Image
+_Distrib
+_Comp
+_Engine
+_Acquisition
+pmode
+matlabpool
+ - docs.it4i/anselm-cluster-documentation/software/mpi/mpi.md
+mpirun
+BLAS1
+FFT
+KMP
+_AFFINITY
+GOMP
+_CPU
+bullxmpi-1
+mpich2
+ - docs.it4i/anselm-cluster-documentation/software/mpi/Running_OpenMPI.md
+bysocket
+bycore
+ - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/fftw.md
+gcc3.3.3
+pthread
+fftw3
+lfftw3
+_threads-lfftw3
+_omp
+icc3.3.3
+FFTW2
+gcc2.1.5
+fftw2
+lfftw
+_threads
+icc2.1.5
+fftw-mpi3
+_mpi
+fftw3-mpi
+fftw2-mpi
+IntelMPI
+ - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/gsl.md
+dwt.c
+mkl
+lgsl
+ - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/hdf5.md
+icc
+hdf5
+_INC
+_SHLIB
+_CPP
+_LIB
+_F90
+gcc49
+ - docs.it4i/anselm-cluster-documentation/software/numerical-libraries/petsc.md
+_Dist
+ - docs.it4i/anselm-cluster-documentation/software/nvidia-cuda.md
+lcublas
+ - docs.it4i/anselm-cluster-documentation/software/operating-system.md
+6.x
+ - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.md
+startxwin
+cygwin64binXWin.exe
+tcp
+ - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.md
+Xming
+XWin.exe.
+ - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.md
+_rsa.ppk
+ - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.md
+_keys
+organization.example.com
+_rsa
+ - docs.it4i/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md
+vpnui.exe
+ - docs.it4i/salomon/ib-single-plane-topology.md
+36-port
+Mcell.pdf
+r21-r38
+nodes.pdf
+ - docs.it4i/salomon/introduction.md
+E5-2680v3
+ - docs.it4i/salomon/network.md
+r4i1n0
+r4i1n1
+r4i1n2
+r4i1n3
+ip
+ - docs.it4i/salomon/software/ansys/setting-license-preferences.md
+ansys161
+ - docs.it4i/salomon/software/ansys/workbench.md
+mpifile.txt
+solvehandlers.xml
+ - docs.it4i/salomon/software/chemistry/phono3py.md
+vasprun.xml
+disp-XXXXX
+disp
+_fc3.yaml
+ir
+_grid
+_points.yaml
+gofree-cond1
+ - docs.it4i/salomon/software/compilers.md
+HPF
+ - docs.it4i/salomon/software/comsol/licensing-and-available-versions.md
+ver
+ - docs.it4i/salomon/software/debuggers/aislinn.md
+test.cpp
+ - docs.it4i/salomon/software/debuggers/intel-vtune-amplifier.md
+vtune
+_update1
+ - docs.it4i/salomon/software/debuggers/valgrind.md
+EBROOTVALGRIND
+ - docs.it4i/salomon/software/intel-suite/intel-advisor.md
+O2
+ - docs.it4i/salomon/software/intel-suite/intel-compilers.md
+UV1
+ - docs.it4i/salomon/software/numerical-languages/octave.md
+octcode.m
+mkoctfile
+ - docs.it4i/software/orca.md
+pdf
+ - node_modules/es6-promise/README.md
+rsvp.js
+es6-promise
+es6-promise-min
+Node.js
+testem
+ - node_modules/spawn-sync/lib/json-buffer/README.md
+node.js
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/doc/wg-meetings/2015-01-30.md
+WG
+domenic
+mikeal
+io.js
+sam
+calvin
+whatwg
+compat
+mathias
+isaac
+chris
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/core-util-is/README.md
+core-util-is
+v0.12.
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/isarray/README.md
+isarray
+Gruber
+julian
+juliangruber.com
+NONINFRINGEMENT
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/process-nextick-args/license.md
+Metcalf
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/process-nextick-args/readme.md
+process-nextick-args
+process.nextTick
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/string_decoder/README.md
+_decoder.js
+Joyent
+joyent
+repo
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/util-deprecate/History.md
+kumavis
+jsdocs
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/node_modules/util-deprecate/README.md
+util-deprecate
+Rajlich
+ - node_modules/spawn-sync/node_modules/concat-stream/node_modules/readable-stream/README.md
+v7.0.0
+userland
+chrisdickinson
+christopher.s.dickinson
+gmail.com
+9554F04D7259F04124DE6B476D5A82AC7E37093B
+calvinmetcalf
+calvin.metcalf
+F3EF5F62A87FC27A22E643F714CE4FF5015AA242
+Vagg
+rvagg
+vagg.org
+DD8F2338BAE7501E3DD5AC78C273792F7D83545D
+sonewman
+newmansam
+outlook.com
+Buus
+mafintosh
+mathiasbuus
+Denicola
+domenic.me
+Matteo
+Collina
+mcollina
+matteo.collina
+3ABC01543F22DD2239285CDD818674489FBC127E
+ - node_modules/spawn-sync/node_modules/concat-stream/readme.md
+concat-stream
+concat
+cb
+ - node_modules/spawn-sync/node_modules/os-shim/README.md
+0.10.x
+os.tmpdir
+os.endianness
+os.EOL
+os.platform
+os.arch
+0.4.x
+Aparicio
+Adesis
+Netlife
+S.L
+ - node_modules/spawn-sync/node_modules/try-thread-sleep/node_modules/thread-sleep/README.md
+node-pre-gyp
+npm
+ - node_modules/spawn-sync/README.md
+iojs
+>>>>>>> readme
--- a/README.md
+++ b/README.md
+# User documentation
+This project contains IT4Innovations user documentation source.
+## Development
+### Install
+```console
+$ sudo apt install libpython-dev
+$ virtualenv venv
+$ source venv/bin/activate
+$ pip install -r requirements.txt
+```
+## Environments
+* [https://docs.it4i.cz - master branch](https://docs.it4i.cz - master branch)
+* [https://docs.it4i.cz/devel/$BRANCH_NAME](https://docs.it4i.cz/devel/$BRANCH_NAME) - maps the branches, available only with VPN access
+## URLs
+* [http://facelessuser.github.io/pymdown-extensions/](http://facelessuser.github.io/pymdown-extensions/)
+* [http://squidfunk.github.io/mkdocs-material/](http://squidfunk.github.io/mkdocs-material/)
+```
+fair-share
+InfiniBand
+RedHat
+CentOS
+Mellanox
+```
+## Mathematical Formulae
+### Formulas are made with:
+* [https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/](https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/)
+* [https://www.mathjax.org/](https://www.mathjax.org/)
+You can add formula to page like this:
+```
+$$
+MAX\_FAIRSHARE * ( 1 - \frac{usage_{Project}}{usage_{Total}} )
+$$
+```
+To enable the MathJX on page you need to enable it by adding line ```---8<--- "mathjax.md"``` at the end of file.
--- a/docs.it4i/anselm/capacity-computing.md
+++ b/docs.it4i/anselm/capacity-computing.md
+# Capacity Computing
+## Introduction
+In many cases, it is useful to submit a huge (>100+) number of computational jobs into the PBS queue system. A huge number of (small) jobs is one of the most effective ways to execute embarrassingly parallel calculations, achieving the best runtime, throughput, and computer utilization.
+However, executing a huge number of jobs via the PBS queue may strain the system. This strain may result in slow response to commands, inefficient scheduling, and overall degradation of performance and user experience, for all users. For this reason, the number of jobs is **limited to 100 per user, 1000 per job array**
+!!! note
+    Please follow one of the procedures below, in case you wish to schedule more than 100 jobs at a time.
+* Use [Job arrays](/anselm/capacity-computing/#job-arrays) when running a huge number of [multithread](anselm/capacity-computing/#shared-jobscript-on-one-node) (bound to one node only) or multinode (multithread across several nodes) jobs
+* Use [GNU parallel](/anselm/capacity-computing/#gnu-parallel) when running single core jobs
+* Combine [GNU parallel with Job arrays](/anselm/capacity-computing/#job-arrays-and-gnu-parallel) when running huge number of single core jobs
+## Policy
+1. A user is allowed to submit at most 100 jobs. Each job may be [a job array](/anselm/capacity-computing/#job-arrays).
+1. The array size is at most 1000 subjobs.
+## Job Arrays
+!!! note
+    A huge number of jobs may easily be submitted and managed as a job array.
+A job array is a compact representation of many jobs, called subjobs. The subjobs share the same job script, and have the same values for all attributes and resources, with the following exceptions:
+* each subjob has a unique index, $PBS_ARRAY_INDEX
+* job Identifiers of subjobs only differ by their indices
+* the state of subjobs can differ (R,Q,...etc.)
+All subjobs within a job array have the same scheduling priority and schedule as independent jobs. An entire job array is submitted through a single qsub command and may be managed by qdel, qalter, qhold, qrls, and qsig commands as a single job.
+### Shared Jobscript
+All subjobs in a job array use the very same, single jobscript. Each subjob runs its own instance of the jobscript. The instances execute different work controlled by the $PBS_ARRAY_INDEX variable.
+Example:
+Assume we have 900 input files with the name of each beginning with "file" (e. g. file001, ..., file900). Assume we would like to use each of these input files with program executable myprog.x, each as a separate job.
+First, we create a tasklist file (or subjobs list), listing all tasks (subjobs) - all input files in our example:
+```console
+$ find . -name 'file*' > tasklist
+```
+Then we create the jobscript:
+```bash
+#!/bin/bash
+#PBS -A PROJECT_ID
+#PBS -q qprod
+#PBS -l select=1:ncpus=16,walltime=02:00:00
+# change to local scratch directory
+SCR=/lscratch/$PBS_JOBID
+mkdir -p $SCR ; cd $SCR || exit
+# get individual tasks from tasklist with index from PBS JOB ARRAY
+TASK=$(sed -n "${PBS_ARRAY_INDEX}p" $PBS_O_WORKDIR/tasklist)
+# copy input file and executable to scratch
+cp $PBS_O_WORKDIR/$TASK input ; cp $PBS_O_WORKDIR/myprog.x .
+# execute the calculation
+./myprog.x < input > output
+# copy output file to submit directory
+cp output $PBS_O_WORKDIR/$TASK.out
+```
+In this example, the submit directory holds the 900 input files, the executable myprog.x, and the jobscript file. As an input for each run, we take the filename of the input file from the created tasklist file. We copy the input file to the local scratch memory /lscratch/$PBS_JOBID, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name. The myprog.x runs on one node only and must use threads to run in parallel. Be aware, that if the myprog.x **is not multithreaded**, then all the **jobs are run as single thread programs in a sequential** manner. Due to the allocation of the whole node, the accounted time is equal to the usage of the whole node, while using only 1/16 of the node!
+If running a huge number of parallel multicore (in means of multinode multithread, e. g. MPI enabled) jobs is needed, then a job array approach should be used. The main difference as compared to previous examples using one node is that the local scratch memory should not be used (as it's not shared between nodes) and MPI or other techniques for parallel multinode processing has to be used properly.
+### Submit the Job Array
+To submit the job array, use the qsub -J command. The 900 jobs of the [example above](/anselm/capacity-computing/#array_example) may be submitted like this:
+```console
+$ qsub -N JOBNAME -J 1-900 jobscript
+12345[].dm2
+```
+In this example, we submit a job array of 900 subjobs. Each subjob will run on one full node and is assumed to take less than 2 hours (note the #PBS directives in the beginning of the jobscript file, don't forget to set your valid PROJECT_ID and desired queue).
+Sometimes for testing purposes, you may need to submit a one-element only array. This is not allowed by PBSPro, but there's a workaround:
+```console
+$ qsub -N JOBNAME -J 9-10:2 jobscript
+```
+This will only choose the lower index (9 in this example) for submitting/running your job.
+### Manage the Job Array
+Check status of the job array using the qstat command.
+```console
+$ qstat -a 12345[].dm2
+dm2:
+                                                            Req'd Req'd   Elap
+Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
+--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
+12345[].dm2     user2    qprod    xx          13516   1 16    --  00:50 B 00:02
+```
+When the status is B it means that some subjobs are already running.
+Check the status of the first 100 subjobs using the qstat command.
+```console
+$ qstat -a 12345[1-100].dm2
+dm2:
+                                                            Req'd Req'd   Elap
+Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
+--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
+12345[1].dm2    user2    qprod    xx          13516   1 16    --  00:50 R 00:02
+12345[2].dm2    user2    qprod    xx          13516   1 16    --  00:50 R 00:02
+12345[3].dm2    user2    qprod    xx          13516   1 16    --  00:50 R 00:01
+12345[4].dm2    user2    qprod    xx          13516   1 16    --  00:50 Q   --
+     .             .        .      .             .    .   .     .    .   .    .
+     ,             .        .      .             .    .   .     .    .   .    .
+12345[100].dm2 user2    qprod    xx          13516   1 16    --  00:50 Q   --
+```
+Delete the entire job array. Running subjobs will be killed, queueing subjobs will be deleted.
+```console
+$ qdel 12345[].dm2
+```
+Deleting large job arrays may take a while.
+Display status information for all user's jobs, job arrays, and subjobs.
+```console
+$ qstat -u $USER -t
+```
+Display status information for all user's subjobs.
+```console
+$ qstat -u $USER -tJ
+```
+Read more on job arrays in the [PBSPro Users guide](pbspro/).
+## GNU Parallel
+!!! note
+    Use GNU parallel to run many single core tasks on one node.
+GNU parallel is a shell tool for executing jobs in parallel using one or more computers. A job can be a single command or a small script that has to be run for each of the lines in the input. GNU parallel is most useful when running single core jobs via the queue system on Anselm.
+For more information and examples see the parallel man page:
+```console
+$ module add parallel
+$ man parallel
+```
+### GNU Parallel Jobscript
+The GNU parallel shell executes multiple instances of the jobscript using all cores on the node. The instances execute different work, controlled by the $PARALLEL_SEQ variable.
+Example:
+Assume we have 101 input files with name beginning with "file" (e. g. file001, ..., file101). Assume we would like to use each of these input files with program executable myprog.x, each as a separate single core job. We call these single core jobs tasks.
+First, we create a tasklist file, listing all tasks - all input files in our example:
+```console
+$ find . -name 'file*' > tasklist
+```
+Then we create a jobscript:
+```bash
+#!/bin/bash
+#PBS -A PROJECT_ID
+#PBS -q qprod
+#PBS -l select=1:ncpus=16,walltime=02:00:00
+[ -z "$PARALLEL_SEQ" ] &&
+{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/tasklist $0 ; }
+# change to local scratch directory
+SCR=/lscratch/$PBS_JOBID/$PARALLEL_SEQ
+mkdir -p $SCR ; cd $SCR || exit
+# get individual task from tasklist
+TASK=$1
+# copy input file and executable to scratch
+cp $PBS_O_WORKDIR/$TASK input
+# execute the calculation
+cat input > output
+# copy output file to submit directory
+cp output $PBS_O_WORKDIR/$TASK.out
+```
+In this example, tasks from the tasklist are executed via the GNU parallel. The jobscript executes multiple instances of itself in parallel, on all cores of the node. Once an instace of the jobscript is finished, a new instance starts until all entries in the tasklist are processed. Currently processed entries of the joblist may be retrieved via $1 variable. The variable $TASK expands to one of the input filenames from the tasklist. We copy the input file to local scratch memory, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name.
+### Submit the Job
+To submit the job, use the qsub command. The 101 task job of the [example above](/anselm/capacity-computing/#gp_example) may be submitted as follows:
+```console
+$ qsub -N JOBNAME jobscript
+12345.dm2
+```
+In this example, we submit a job of 101 tasks. 16 input files will be processed in parallel. The 101 tasks on 16 cores are assumed to complete in less than 2 hours.
+!!! hint
+    Use #PBS directives at the beginning of the jobscript file, don't forget to set your valid PROJECT_ID and desired queue.
+## Job Arrays and GNU Parallel
+!!! note
+    Combine the Job arrays and GNU parallel for the best throughput of single core jobs
+While job arrays are able to utilize all available computational nodes, the GNU parallel can be used to efficiently run multiple single-core jobs on a single node. The two approaches may be combined to utilize all available (current and future) resources to execute single core jobs.
+!!! note
+    Every subjob in an array runs GNU parallel to utilize all cores on the node
+### GNU Parallel, Shared jobscript
+A combined approach, very similar to job arrays, can be taken. A job array is submitted to the queuing system. The subjobs run GNU parallel. The GNU parallel shell executes multiple instances of the jobscript using all of the cores on the node. The instances execute different work, controlled by the $PBS_JOB_ARRAY and $PARALLEL_SEQ variables.
+Example:
+Assume we have 992 input files with each name beginning with "file" (e. g. file001, ..., file992). Assume we would like to use each of these input files with program executable myprog.x, each as a separate single core job. We call these single core jobs tasks.
+First, we create a tasklist file, listing all tasks - all input files in our example:
+```console
+$ find . -name 'file*' > tasklist
+```
+Next we create a file, controlling how many tasks will be executed in one subjob:
+```console
+$ seq 32 > numtasks
+```
+Then we create a jobscript:
+```bash
+#!/bin/bash
+#PBS -A PROJECT_ID
+#PBS -q qprod
+#PBS -l select=1:ncpus=16,walltime=02:00:00
+[ -z "$PARALLEL_SEQ" ] &&
+{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/numtasks $0 ; }
+# change to local scratch directory
+SCR=/lscratch/$PBS_JOBID/$PARALLEL_SEQ
+mkdir -p $SCR ; cd $SCR || exit
+# get individual task from tasklist with index from PBS JOB ARRAY and index form Parallel
+IDX=$(($PBS_ARRAY_INDEX + $PARALLEL_SEQ - 1))
+TASK=$(sed -n "${IDX}p" $PBS_O_WORKDIR/tasklist)
+[ -z "$TASK" ] && exit
+# copy input file and executable to scratch
+cp $PBS_O_WORKDIR/$TASK input
+# execute the calculation
+cat input > output
+# copy output file to submit directory
+cp output $PBS_O_WORKDIR/$TASK.out
+```
+In this example, the jobscript executes in multiple instances in parallel, on all cores of a computing node. The variable $TASK expands to one of the input filenames from the tasklist. We copy the input file to local scratch memory, execute the myprog.x and copy the output file back to the submit directory, under the $TASK.out name.  The numtasks file controls how many tasks will be run per subjob. Once a task is finished, a new task starts, until the number of tasks in the numtasks file is reached.
+!!! note
+    Select subjob walltime and number of tasks per subjob carefully
+When deciding this values, keep in mind the following guiding rules:
+1. Let n=N/16.  Inequality (n+1) \* T < W should hold. N is the number of tasks per subjob, T is expected single task walltime and W is subjob walltime. Short subjob walltime improves scheduling and job throughput.
+1. The number of tasks should be modulo 16.
+1. These rules are valid only when all tasks have similar task walltimes T.
+### Submit the Job Array (-J)
+To submit the job array, use the qsub -J command. The 992 task job of the [example above](/anselm/capacity-computing/#combined_example) may be submitted like this:
+```console
+$ qsub -N JOBNAME -J 1-992:32 jobscript
+12345[].dm2
+```
+In this example, we submit a job array of 31 subjobs. Note the -J 1-992:**32**, this must be the same as the number sent to numtasks file. Each subjob will run on one full node and process 16 input files in parallel, 32 in total per subjob.  Every subjob is assumed to complete in less than 2 hours.
+!!! hint
+    Use #PBS directives at the beginning of the jobscript file, don't forget to set your valid PROJECT_ID and desired queue.
+## Examples
+Download the examples in [capacity.zip](capacity.zip), illustrating the above listed ways to run a huge number of jobs. We recommend trying out the examples before using this for running production jobs.
+Unzip the archive in an empty directory on Anselm and follow the instructions in the README file
+```console
+$ unzip capacity.zip
+$ cat README
+```
--- a/docs.it4i/anselm/capacity.zip
+++ b/docs.it4i/anselm/capacity.zip
--- a/docs.it4i/anselm/compute-nodes.md
+++ b/docs.it4i/anselm/compute-nodes.md
+# Compute Nodes
+## Node Configuration
+Anselm is cluster of x86-64 Intel based nodes built with Bull Extreme Computing bullx technology. The cluster contains four types of compute nodes.
+### Compute Nodes Without Accelerators
+* 180 nodes
+* 2880 cores in total
+* two Intel Sandy Bridge E5-2665, 8-core, 2.4GHz processors per node
+* 64 GB of physical memory per node
+* one 500GB SATA 2,5” 7,2 krpm HDD per node
+* bullx B510 blade servers
+* cn[1-180]
+### Compute Nodes With a GPU Accelerator
+* 23 nodes
+* 368 cores in total
+* two Intel Sandy Bridge E5-2470, 8-core, 2.3GHz processors per node
+* 96 GB of physical memory per node
+* one 500GB SATA 2,5” 7,2 krpm HDD per node
+* GPU accelerator 1x NVIDIA Tesla Kepler K20m per node
+* bullx B515 blade servers
+* cn[181-203]
+### Compute Nodes With a MIC Accelerator
+* 4 nodes
+* 64 cores in total
+* two Intel Sandy Bridge E5-2470, 8-core, 2.3GHz processors per node
+* 96 GB of physical memory per node
+* one 500GB SATA 2,5” 7,2 krpm HDD per node
+* MIC accelerator 1x Intel Phi 5110P per node
+* bullx B515 blade servers
+* cn[204-207]
+### Fat Compute Nodes
+* 2 nodes
+* 32 cores in total
+* 2 Intel Sandy Bridge E5-2665, 8-core, 2.4GHz processors per node
+* 512 GB of physical memory per node
+* two 300GB SAS 3,5” 15krpm HDD (RAID1) per node
+* two 100GB SLC SSD per node
+* bullx R423-E3 servers
+* cn[208-209]
+![](../img/bullxB510.png)
+**Anselm bullx B510 servers**
+### Compute Node Summary
+| Node type                    | Count | Range       | Memory | Cores       | [Access](/general/resources-allocation-policy/)    |
+| ---------------------------- | ----- | ----------- | ------ | ----------- | --------------------------------------    |
+| Nodes without an accelerator | 180   | cn[1-180]   | 64GB   | 16 @ 2.4GHz | qexp, qprod, qlong, qfree, qprace, qatlas |
+| Nodes with a GPU accelerator | 23    | cn[181-203] | 96GB   | 16 @ 2.3GHz | qnvidia, qexp                             |
+| Nodes with a MIC accelerator | 4     | cn[204-207] | 96GB   | 16 @ 2.3GHz | qmic, qexp                                |
+| Fat compute nodes            | 2     | cn[208-209] | 512GB  | 16 @ 2.4GHz | qfat, qexp                                |
+## Processor Architecture
+Anselm is equipped with Intel Sandy Bridge processors Intel Xeon E5-2665 (nodes without accelerators and fat nodes) and Intel Xeon E5-2470 (nodes with accelerators). The processors support Advanced Vector Extensions (AVX) 256-bit instruction set.
+### Intel Sandy Bridge E5-2665 Processor
+* eight-core
+* speed: 2.4 GHz, up to 3.1 GHz using Turbo Boost Technology
+* peak performance:  19.2 GFLOP/s per core
+* caches:
+  * L2: 256 KB per core
+  * L3: 20 MB per processor
+* memory bandwidth at the level of the processor: 51.2 GB/s
+### Intel Sandy Bridge E5-2470 Processor
+* eight-core
+* speed: 2.3 GHz, up to 3.1 GHz using Turbo Boost Technology
+* peak performance:  18.4 GFLOP/s per core
+* caches:
+  * L2: 256 KB per core
+  * L3: 20 MB per processor
+* memory bandwidth at the level of the processor: 38.4 GB/s
+Nodes equipped with Intel Xeon E5-2665 CPU have a set PBS resource attribute cpu_freq = 24, nodes equipped with Intel Xeon E5-2470 CPU have set PBS resource attribute cpu_freq = 23.
+```console
+$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16:cpu_freq=24 -I
+```
+In this example, we allocate 4 nodes, 16 cores at 2.4GHhz per node.
+Intel Turbo Boost Technology is used by default,  you can disable it for all nodes of job by using resource attribute cpu_turbo_boost.
+```console
+$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I
+```
+## Memmory Architecture
+The cluster contains three types of compute nodes.
+### Compute Nodes Without Accelerators
+* 2 sockets
+* Memory Controllers are integrated into processors.
+  * 8 DDR3 DIMMs per node
+  * 4 DDR3 DIMMs per CPU
+  * 1 DDR3 DIMMs per channel
+  * Data rate support: up to 1600MT/s
+* Populated memory: 8 x 8 GB DDR3 DIMM 1600 MHz
+### Compute Nodes With a GPU or MIC Accelerator
+* 2 sockets
+* Memory Controllers are integrated into processors.
+  * 6 DDR3 DIMMs per node
+  * 3 DDR3 DIMMs per CPU
+  * 1 DDR3 DIMMs per channel
+  * Data rate support: up to 1600MT/s
+* Populated memory: 6 x 16 GB DDR3 DIMM 1600 MHz
+### Fat Compute Nodes
+* 2 sockets
+* Memory Controllers are integrated into processors.
+  * 16 DDR3 DIMMs per node
+  * 8 DDR3 DIMMs per CPU
+  * 2 DDR3 DIMMs per channel
+  * Data rate support: up to 1600MT/s
+* Populated memory: 16 x 32 GB DDR3 DIMM 1600 MHz
--- a/docs.it4i/anselm/hardware-overview.md
+++ b/docs.it4i/anselm/hardware-overview.md
+# Hardware Overview
+The Anselm cluster consists of 209 computational nodes named cn[1-209] of which 180 are regular compute nodes, 23 are GPU Kepler K20 accelerated nodes, 4 are MIC Xeon Phi 5110P accelerated nodes, and 2 are fat nodes. Each node is a powerful x86-64 computer, equipped with 16 cores (two eight-core Intel Sandy Bridge processors), at least 64 GB of RAM, and a local hard drive. User access to the Anselm cluster is provided by two login nodes login[1,2]. The nodes are interlinked through high speed InfiniBand and Ethernet networks. All nodes share a 320 TB /home disk for storage of user files. The 146 TB shared /scratch storage is available for scratch data.
+The Fat nodes are equipped with a large amount (512 GB) of memory. Virtualization infrastructure provides resources to run long term servers and services in virtual mode. Fat nodes and virtual servers may access 45 TB of dedicated block storage. Accelerated nodes, fat nodes, and virtualization infrastructure are available [upon request](https://support.it4i.cz/rt) from a PI.
+Schematic representation of the Anselm cluster. Each box represents a node (computer) or storage capacity:
+![](../img/Anselm-Schematic-Representation.png)
+The cluster compute nodes cn[1-207] are organized within 13 chassis.
+There are four types of compute nodes:
+* 180 compute nodes without an accelerator
+* 23 compute nodes with a GPU accelerator - an NVIDIA Tesla Kepler K20m
+* 4 compute nodes with a MIC accelerator - an Intel Xeon Phi 5110P
+* 2 fat nodes - equipped with 512 GB of RAM and two 100 GB SSD drives
+[More about Compute nodes](/anselm/compute-nodes/).
+GPU and accelerated nodes are available upon request, see the [Resources Allocation Policy](/anselm/resources-allocation-policy/).
+All of these nodes are interconnected through fast InfiniBand and Ethernet networks.  [More about the Network](/anselm/network/).
+Every chassis provides an InfiniBand switch, marked **isw**, connecting all nodes in the chassis, as well as connecting the chassis to the upper level switches.
+All of the nodes share a 360 TB /home disk for storage of user files. The 146 TB shared /scratch storage is available for scratch data. These file systems are provided by the Lustre parallel file system. There is also local disk storage available on all compute nodes in /lscratch.  [More about Storage](/anselm/storage/).
+User access to the Anselm cluster is provided by two login nodes login1, login2, and data mover node dm1. [More about accessing the cluster.](/anselm/shell-and-data-access/)
+The parameters are summarized in the following tables:
+| **In general**                              |                                              |
+| ------------------------------------------- | -------------------------------------------- |
+| Primary purpose                             | High Performance Computing                   |
+| Architecture of compute nodes               | x86-64                                       |
+| Operating system                            | Linux (CentOS)                               |
+| [**Compute nodes**](/anselm/compute-nodes/)  |                                              |
+| Total                                       | 209                                          |
+| Processor cores                             | 16 (2 x 8 cores)                             |
+| RAM                                         | min. 64 GB, min. 4 GB per core               |
+| Local disk drive                            | yes - usually 500 GB                         |
+| Compute network                             | InfiniBand QDR, fully non-blocking, fat-tree |
+| w/o accelerator                             | 180, cn[1-180]                               |
+| GPU accelerated                             | 23, cn[181-203]                              |
+| MIC accelerated                             | 4, cn[204-207]                               |
+| Fat compute nodes                           | 2, cn[208-209]                               |
+| **In total**                                |                                              |
+| Total theoretical peak performance  (Rpeak) | 94 TFLOP/s                                   |
+| Total max. LINPACK performance  (Rmax)      | 73 TFLOP/s                                   |
+| Total amount of RAM                         | 15.136 TB                                    |
+| Node             | Processor                               | Memory | Accelerator          |
+| ---------------- | --------------------------------------- | ------ | -------------------- |
+| w/o accelerator  | 2 x Intel Sandy Bridge E5-2665, 2.4 GHz | 64 GB  | -                    |
+| GPU accelerated  | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB  | NVIDIA Kepler K20m   |
+| MIC accelerated  | 2 x Intel Sandy Bridge E5-2470, 2.3 GHz | 96 GB  | Intel Xeon Phi 5110P |
+| Fat compute node | 2 x Intel Sandy Bridge E5-2665, 2.4 GHz | 512 GB | -                    |
+For more details refer to [Compute nodes](/anselm/compute-nodes/), [Storage](anselm/storage/), and [Network](anselm/network/).
--- a/docs.it4i/anselm/introduction.md
+++ b/docs.it4i/anselm/introduction.md
+# Introduction
+Welcome to Anselm supercomputer cluster. The Anselm cluster consists of 209 compute nodes, totalling 3344 compute cores with 15 TB RAM, giving over 94 TFLOP/s theoretical peak performance. Each node is a powerful x86-64 computer, equipped with 16 cores, at least 64 GB of RAM, and a 500 GB hard disk drive. Nodes are interconnected through a fully non-blocking fat-tree InfiniBand network, and are equipped with Intel Sandy Bridge processors. A few nodes are also equipped with NVIDIA Kepler GPU or Intel Xeon Phi MIC accelerators. Read more in [Hardware Overview](/anselm/hardware-overview/).
+The cluster runs with an [operating system](/software/operating-system/) which is compatible with the RedHat [Linux family.](http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg) We have installed a wide range of software packages targeted at different scientific domains. These packages are accessible via the [modules environment](environment-and-modules/).
+The user data shared file-system (HOME, 320 TB) and job data shared file-system (SCRATCH, 146 TB) are available to users.
+The PBS Professional workload manager provides [computing resources allocations and job execution](/anselm/resources-allocation-policy/).
+Read more on how to [apply for resources](/general/applying-for-resources/), [obtain login credentials](general/obtaining-login-credentials/obtaining-login-credentials/) and [access the cluster](/anselm/shell-and-data-access/).
--- a/docs.it4i/anselm/job-priority.md
+++ b/docs.it4i/anselm/job-priority.md
+# Job Scheduling
+## Job Execution Priority
+The scheduler gives each job an execution priority and then uses this job execution priority to select which job(s) to run.
+Job execution priority on Anselm is determined by these job properties (in order of importance):
+1. queue priority
+1. fair-share priority
+1. eligible time
+### Queue Priority
+Queue priority is the priority of the queue in which the job is waiting prior to execution.
+Queue priority has the biggest impact on job execution priority. The execution priority of jobs in higher priority queues is always greater than the execution priority of jobs in lower priority queues. Other properties of jobs used for determining the job execution priority (fair-share priority, eligible time) cannot compete with queue priority.
+Queue priorities can be seen at [https://extranet.it4i.cz/anselm/queues](https://extranet.it4i.cz/anselm/queues)
+### Fair-Share Priority
+Fair-share priority is priority calculated on the basis of recent usage of resources. Fair-share priority is calculated per project, all members of a project sharing the same fair-share priority. Projects with higher recent usage have a lower fair-share priority than projects with lower or no recent usage.
+Fair-share priority is used for ranking jobs with equal queue priority.
+Fair-share priority is calculated as
+---8<--- "fairshare_formula.md"
+where MAX_FAIRSHARE has value 1E6,
+usage<sub>Project</sub> is accumulated usage by all members of a selected project,
+usage<sub>Total</sub> is total usage by all users, across all projects.
+Usage counts allocated core-hours (`ncpus x walltime`). Usage decays, halving at intervals of 168 hours (one week).
+Jobs queued in the queue qexp are not used to calculate the project's usage.
+!!! note
+    Calculated usage and fair-share priority can be seen at [https://extranet.it4i.cz/anselm/projects](https://extranet.it4i.cz/anselm/projects).
+Calculated fair-share priority can be also be seen in the Resource_List.fairshare attribute of a job.
+### Eligible Time
+Eligible time is the amount (in seconds) of eligible time a job accrues while waiting to run. Jobs with higher eligible time gain higher priority.
+Eligible time has the least impact on execution priority. Eligible time is used for sorting jobs with equal queue priority and fair-share priority. It is very, very difficult for eligible time to compete with fair-share priority.
+Eligible time can be seen in the eligible_time attribute of job.
+### Formula
+Job execution priority (job sort formula) is calculated as:
+---8<--- "job_sort_formula.md"
+### Job Backfilling
+The Anselm cluster uses job backfilling.
+Backfilling means fitting smaller jobs around the higher-priority jobs that the scheduler is going to run next, in such a way that the higher-priority jobs are not delayed. Backfilling allows us to keep resources from becoming idle when the top job (the job with the highest execution priority) cannot run.
+The scheduler makes a list of jobs to run in order of execution priority. The scheduler looks for smaller jobs that can fit into the usage gaps around the highest-priority jobs in the list. The scheduler looks in the prioritized list of jobs and chooses the highest-priority smaller jobs that fit. Filler jobs are run only if they will not delay the start time of top jobs.
+This means that jobs with lower execution priority can be run before jobs with higher execution priority.
+!!! note
+    It is **very beneficial to specify the walltime** when submitting jobs.
+Specifying more accurate walltime enables better scheduling, better execution times, and better resource usage. Jobs with suitable (small) walltime can be backfilled - and overtake job(s) with a higher priority.
+---8<--- "mathjax.md"
--- a/docs.it4i/anselm/job-submission-and-execution.md
+++ b/docs.it4i/anselm/job-submission-and-execution.md
+# Job Submission and Execution
+## Job Submission
+When allocating computational resources for the job, specify:
+1. a suitable queue for your job (the default is qprod)
+1. the number of computational nodes required
+1. the number of cores per node required
+1. the maximum wall time allocated to your calculation, note that jobs exceeding the maximum wall time will be killed
+1. your Project ID
+1. a Jobscript or interactive switch
+!!! note
+    Use the **qsub** command to submit your job to a queue for allocation of computational resources.
+Submit the job using the qsub command:
+```console
+$ qsub -A Project_ID -q queue -l select=x:ncpus=y,walltime=[[hh:]mm:]ss[.ms] jobscript
+```
+The qsub command submits the job to the queue, i.e. the qsub command creates a request to the PBS Job manager for allocation of specified resources. The resources will be allocated when available, subject to the above described policies and constraints. **After the resources are allocated, the jobscript or interactive shell is executed on the first of the allocated nodes.**
+!!! note
+    PBS statement nodes (qsub -l nodes=nodespec) are not supported on the Anselm cluster.
+### Job Submission Examples
+```console
+$ qsub -A OPEN-0-0 -q qprod -l select=64:ncpus=16,walltime=03:00:00 ./myjob
+```
+In this example, we allocate 64 nodes, 16 cores per node, for 3 hours. We allocate these resources via the qprod queue, consumed resources will be accounted to the Project identified by Project ID OPEN-0-0. The jobscript 'myjob' will be executed on the first node in the allocation.
+```console
+$ qsub -q qexp -l select=4:ncpus=16 -I
+```
+In this example, we allocate 4 nodes, 16 cores per node, for 1 hour. We allocate these resources via the qexp queue. The resources will be available interactively.
+```console
+$ qsub -A OPEN-0-0 -q qnvidia -l select=10:ncpus=16 ./myjob
+```
+In this example, we allocate 10 nvidia accelerated nodes, 16 cores per node, for 24 hours. We allocate these resources via the qnvidia queue. the jobscript 'myjob' will be executed on the first node in the allocation.
+```console
+$ qsub -A OPEN-0-0 -q qfree -l select=10:ncpus=16 ./myjob
+```
+In this example, we allocate 10 nodes, 16 cores per node, for 12 hours. We allocate these resources via the qfree queue. It is not required that the project OPEN-0-0 has any available resources left. Consumed resources are still accounted for. The jobscript myjob will be executed on the first node in the allocation.
+All qsub options may be [saved directly into the jobscript](#example-jobscript-for-mpi-calculation-with-preloaded-inputs). In such cases, it is not necessary to specify any options for qsub.
+```console
+$ qsub ./myjob
+```
+By default, the PBS batch system sends an e-mail only when the job is aborted. Disabling mail events completely can be done as follows:
+```console
+$ qsub -m n
+```
+## Advanced Job Placement
+### Placement by Name
+Specific nodes may be allocated via the PBS
+```console
+$ qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=16:host=cn171+1:ncpus=16:host=cn172 -I
+```
+In this example, we allocate nodes cn171 and cn172, all 16 cores per node, for 24 hours.  Consumed resources will be accounted to the Project identified by Project ID OPEN-0-0. The resources will be available interactively.
+### Placement by CPU Type
+Nodes equipped with an Intel Xeon E5-2665 CPU have a base clock frequency of 2.4GHz, nodes equipped with an Intel Xeon E5-2470 CPU have a base frequency of 2.3 GHz (see the section Compute Nodes for details).  Nodes may be selected via the PBS resource attribute cpu_freq .
+| CPU Type           | base freq. | Nodes                  | cpu_freq attribute |
+| ------------------ | ---------- | ---------------------- | ------------------ |
+| Intel Xeon E5-2665 | 2.4GHz     | cn[1-180], cn[208-209] | 24                 |
+| Intel Xeon E5-2470 | 2.3GHz     | cn[181-207]            | 23                 |
+```console
+$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16:cpu_freq=24 -I
+```
+In this example, we allocate 4 nodes, 16 cores per node, selecting only the nodes with Intel Xeon E5-2665 CPU.
+### Placement by IB Switch
+Groups of computational nodes are connected to chassis integrated Infiniband switches. These switches form the leaf switch layer of the [Infiniband network](/anselm/network/) fat tree topology. Nodes sharing the leaf switch can communicate most efficiently. Sharing the same switch prevents hops in the network and facilitates unbiased, highly efficient network communication.
+Nodes sharing the same switch may be selected via the PBS resource attribute ibswitch. Values of this attribute are iswXX, where XX is the switch number. The node-switch mapping can be seen in the [Hardware Overview](/anselm/hardware-overview/) section.
+We recommend allocating compute nodes to a single switch when best possible computational network performance is required to run the job efficiently:
+```console
+$ qsub -A OPEN-0-0 -q qprod -l select=18:ncpus=16:ibswitch=isw11 ./myjob
+```
+In this example, we request all of the 18 nodes sharing the isw11 switch for 24 hours. a full chassis will be allocated.
+## Advanced Job Handling
+### Selecting Turbo Boost Off
+Intel Turbo Boost Technology is on by default. We strongly recommend keeping the default.
+If necessary (such as in the case of benchmarking) you can disable the Turbo for all nodes of the job by using the PBS resource attribute cpu_turbo_boost:
+```console
+$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I
+```
+More information about the Intel Turbo Boost can be found in the TurboBoost section
+### Advanced Examples
+In the following example, we select an allocation for benchmarking a very special and demanding MPI program. We request Turbo off, and 2 full chassis of compute nodes (nodes sharing the same IB switches) for 30 minutes:
+```console
+$ qsub -A OPEN-0-0 -q qprod
+    -l select=18:ncpus=16:ibswitch=isw10:mpiprocs=1:ompthreads=16+18:ncpus=16:ibswitch=isw20:mpiprocs=16:ompthreads=1
+    -l cpu_turbo_boost=0,walltime=00:30:00
+    -N Benchmark ./mybenchmark
+```
+The MPI processes will be distributed differently on the nodes connected to the two switches. On the isw10 nodes, we will run 1 MPI process per node with 16 threads per process, on isw20 nodes we will run 16 plain MPI processes.
+Although this example is somewhat artificial, it demonstrates the flexibility of the qsub command options.
+## Job Management
+!!! note
+    Check status of your jobs using the **qstat** and **check-pbs-jobs** commands
+```console
+$ qstat -a
+$ qstat -a -u username
+$ qstat -an -u username
+$ qstat -f 12345.srv11
+```
+Example:
+```console
+$ qstat -a
+srv11:
+                                                            Req'd Req'd   Elap
+Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
+--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
+16287.srv11     user1    qlong    job1         6183   4 64    --  144:0 R 38:25
+16468.srv11     user1    qlong    job2         8060   4 64    --  144:0 R 17:44
+16547.srv11     user2    qprod    job3x       13516   2 32    --  48:00 R 00:58
+```
+In this example user1 and user2 are running jobs named job1, job2 and job3x. The jobs job1 and job2 are using 4 nodes, 16 cores per node each. job1 has already run for 38 hours and 25 minutes, and job2 for 17 hours 44 minutes. job1 has already consumed `64 x 38.41 = 2458.6` core hours. job3x has already consumed `0.96 x 32 = 30.93` core hours. These consumed core hours will be accounted for on the respective project accounts, regardless of whether the allocated cores were actually used for computations.
+The following commands allow you to; check the status of your jobs using the check-pbs-jobs command; check for the presence of user's PBS jobs' processes on execution hosts; display load and processes; display job standard and error output; continuously display (tail -f) job standard or error output;
+```console
+$ check-pbs-jobs --check-all
+$ check-pbs-jobs --print-load --print-processes
+$ check-pbs-jobs --print-job-out --print-job-err
+$ check-pbs-jobs --jobid JOBID --check-all --print-all
+$ check-pbs-jobs --jobid JOBID --tailf-job-out
+```
+Examples:
+```console
+$ check-pbs-jobs --check-all
+JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
+Check session id: OK
+Check processes
+cn164: OK
+cn165: No process
+```
+In this example we see that job 35141.dm2 is not currently running any processes on the allocated node cn165, which may indicate an execution error.
+```console
+$ check-pbs-jobs --print-load --print-processes
+JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
+Print load
+cn164: LOAD: 16.01, 16.01, 16.00
+cn165: LOAD:  0.01,  0.00,  0.01
+Print processes
+       %CPU CMD
+cn164:  0.0 -bash
+cn164:  0.0 /bin/bash /var/spool/PBS/mom_priv/jobs/35141.dm2.SC
+cn164: 99.7 run-task
+...
+```
+In this example we see that job 35141.dm2 is currently running a process run-task on node cn164, using one thread only, while node cn165 is empty, which may indicate an execution error.
+```console
+$ check-pbs-jobs --jobid 35141.dm2 --print-job-out
+JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
+Print job standard output:
+======================== Job start  ==========================
+Started at    : Fri Aug 30 02:47:53 CEST 2013
+Script name   : script
+Run loop 1
+Run loop 2
+Run loop 3
+```
+In this example, we see actual output (some iteration loops) of the job 35141.dm2
+!!! note
+    Manage your queued or running jobs, using the **qhold**, **qrls**, **qdel**, **qsig** or **qalter** commands
+You may release your allocation at any time, using the qdel command
+```console
+$ qdel 12345.srv11
+```
+You may kill a running job by force, using the qsig command
+```console
+$ qsig -s 9 12345.srv11
+```
+Learn more by reading the pbs man page
+```console
+$ man pbs_professional
+```
+## Job Execution
+### Jobscript
+!!! note
+    Prepare the jobscript to run batch jobs in the PBS queue system
+The Jobscript is a user made script controlling a sequence of commands for executing the calculation. It is often written in bash, though other scripts may be used as well. The jobscript is supplied to the PBS **qsub** command as an argument, and is executed by the PBS Professional workload manager.
+!!! note
+    The jobscript or interactive shell is executed on first of the allocated nodes.
+```console
+$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob
+$ qstat -n -u username
+srv11:
+                                                            Req'd Req'd   Elap
+Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
+--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
+15209.srv11     username qexp     Name0        5530   4 64    --  01:00 R 00:00
+   cn17/0*16+cn108/0*16+cn109/0*16+cn110/0*16
+```
+In this example, the nodes cn17, cn108, cn109, and cn110 were allocated for 1 hour via the qexp queue. The jobscript myjob will be executed on the node cn17, while the nodes cn108, cn109, and cn110 are available for use as well.
+The jobscript or interactive shell is by default executed in the home directory
+```console
+$ qsub -q qexp -l select=4:ncpus=16 -I
+qsub: waiting for job 15210.srv11 to start
+qsub: job 15210.srv11 ready
+$ pwd
+/home/username
+```
+In this example, 4 nodes were allocated interactively for 1 hour via the qexp queue. The interactive shell is executed in the home directory.
+!!! note
+    All nodes within the allocation may be accessed via ssh.  Unallocated nodes are not accessible to the user.
+The allocated nodes are accessible via ssh from login nodes. The nodes may access each other via ssh as well.
+Calculations on allocated nodes may be executed remotely via the MPI, ssh, pdsh or clush. You may find out which nodes belong to the allocation by reading the $PBS_NODEFILE file
+```console
+qsub -q qexp -l select=4:ncpus=16 -I
+qsub: waiting for job 15210.srv11 to start
+qsub: job 15210.srv11 ready
+$ pwd
+/home/username
+$ sort -u $PBS_NODEFILE
+cn17.bullx
+cn108.bullx
+cn109.bullx
+cn110.bullx
+$ pdsh -w cn17,cn[108-110] hostname
+cn17: cn17
+cn108: cn108
+cn109: cn109
+cn110: cn110
+```
+In this example, the hostname program is executed via pdsh from the interactive shell. The execution runs on all four allocated nodes. The same result would be achieved if the pdsh is called from any of the allocated nodes or from the login nodes.
+### Example Jobscript for MPI Calculation
+!!! note
+    Production jobs must use the /scratch directory for I/O
+The recommended way to run production jobs is to change to the /scratch directory early in the jobscript, copy all inputs to /scratch, execute the calculations and copy outputs to the home directory.
+```bash
+#!/bin/bash
+# change to scratch directory, exit on failure
+SCRDIR=/scratch/$USER/myjob
+mkdir -p $SCRDIR
+cd $SCRDIR || exit
+# copy input file to scratch
+cp $PBS_O_WORKDIR/input .
+cp $PBS_O_WORKDIR/mympiprog.x .
+# load the MPI module
+ml OpenMPI
+# execute the calculation
+mpirun -pernode ./mympiprog.x
+# copy output file to home
+cp output $PBS_O_WORKDIR/.
+#exit
+exit
+```
+In this example, a directory in /home holds the input file input and executable mympiprog.x . We create the directory myjob on the /scratch filesystem, copy input and executable files from the /home directory where the qsub was invoked ($PBS_O_WORKDIR) to /scratch, execute the MPI program mympiprog.x and copy the output file back to the /home directory. mympiprog.x is executed as one process per node, on all allocated nodes.
+!!! note
+    Consider preloading inputs and executables onto [shared scratch](storage/) memory before the calculation starts.
+In some cases, it may be impractical to copy the inputs to the scratch memory and the outputs to the home directory. This is especially true when very large input and output files are expected, or when the files should be reused by a subsequent calculation. In such cases, it is the users' responsibility to preload the input files on shared /scratch memory before the job submission, and retrieve the outputs manually after all calculations are finished.
+!!! note
+    Store the qsub options within the jobscript. Use **mpiprocs** and **ompthreads** qsub options to control the MPI job execution.
+### Example Jobscript for MPI Calculation With Preloaded Inputs
+Example jobscript for an MPI job with preloaded inputs and executables, options for qsub are stored within the script:
+```bash
+#!/bin/bash
+#PBS -q qprod
+#PBS -N MYJOB
+#PBS -l select=100:ncpus=16:mpiprocs=1:ompthreads=16
+#PBS -A OPEN-0-0
+# change to scratch directory, exit on failure
+SCRDIR=/scratch/$USER/myjob
+cd $SCRDIR || exit
+# load the MPI module
+ml OpenMPI
+# execute the calculation
+mpirun ./mympiprog.x
+#exit
+exit
+```
+In this example, input and executable files are assumed to be preloaded manually in the /scratch/$USER/myjob directory. Note the **mpiprocs** and **ompthreads** qsub options controlling the behavior of the MPI execution. mympiprog.x is executed as one process per node, on all 100 allocated nodes. If mympiprog.x implements OpenMP threads, it will run 16 threads per node.
+More information can be found in the [Running OpenMPI](/software/mpi/Running_OpenMPI/) and [Running MPICH2](software/mpi/running-mpich2/)
+sections.
+### Example Jobscript for Single Node Calculation
+!!! note
+    The local scratch directory is often useful for single node jobs. Local scratch memory will be deleted immediately after the job ends.
+Example jobscript for single node calculation, using [local scratch](/anselm/storage/) memory on the node:
+```bash
+#!/bin/bash
+# change to local scratch directory
+cd /lscratch/$PBS_JOBID || exit
+# copy input file to scratch
+cp $PBS_O_WORKDIR/input .
+cp $PBS_O_WORKDIR/myprog.x .
+# execute the calculation
+./myprog.x
+# copy output file to home
+cp output $PBS_O_WORKDIR/.
+#exit
+exit
+```
+In this example, a directory in /home holds the input file input and executable myprog.x . We copy input and executable files from the home directory where the qsub was invoked ($PBS_O_WORKDIR) to local scratch memory /lscratch/$PBS_JOBID, execute myprog.x and copy the output file back to the /home directory. myprog.x runs on one node only and may use threads.
+### Other Jobscript Examples
+Further jobscript examples may be found in the software section and the [Capacity computing](/anselm/capacity-computing/) section.
--- a/docs.it4i/anselm/network.md
+++ b/docs.it4i/anselm/network.md
+# Network
+All of the compute and login nodes of Anselm are interconnected through an [InfiniBand](http://en.wikipedia.org/wiki/InfiniBand) QDR network and a Gigabit [Ethernet](http://en.wikipedia.org/wiki/Ethernet) network. Both networks may be used to transfer user data.
+## InfiniBand Network
+All of the compute and login nodes of Anselm are interconnected through a high-bandwidth, low-latency [InfiniBand](http://en.wikipedia.org/wiki/InfiniBand) QDR network (IB 4 x QDR, 40 Gbps). The network topology is a fully non-blocking fat-tree.
+The compute nodes may be accessed via the InfiniBand network using ib0 network interface, in address range 10.2.1.1-209. The MPI may be used to establish native InfiniBand connection among the nodes.
+!!! note
+    The network provides **2170 MB/s** transfer rates via the TCP connection (single stream) and up to **3600 MB/s** via the native InfiniBand protocol.
+The Fat tree topology ensures that peak transfer rates are achieved between any two nodes, independent of network traffic exchanged among other nodes concurrently.
+## Ethernet Network
+The compute nodes may be accessed via the regular Gigabit Ethernet network interface eth0, in address range 10.1.1.1-209, or by using aliases cn1-cn209. The network provides **114 MB/s** transfer rates via the TCP connection.
+## Example
+```console
+$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob
+$ qstat -n -u username
+                                                            Req'd Req'd   Elap
+Job ID          Username Queue    Jobname    SessID NDS TSK Memory Time S Time
+--------------- -------- --  |---|---| ------ --- --- ------ ----- - -----
+15209.srv11     username qexp     Name0        5530   4 64    --  01:00 R 00:00
+   cn17/0*16+cn108/0*16+cn109/0*16+cn110/0*16
+$ ssh 10.2.1.110
+$ ssh 10.1.1.108
+```
+In this example, we access the node cn110 through the InfiniBand network via the ib0 interface, then from cn110 to cn108 through the Ethernet network.
--- a/docs.it4i/anselm/resources-allocation-policy.md
+++ b/docs.it4i/anselm/resources-allocation-policy.md
+# Resources Allocation Policy
+## Job Queue Policies
+The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and the resources available to the Project. The Fair-share system of Anselm ensures that individual users may consume approximately equal amounts of resources per week. Detailed information can be found in the [Job scheduling](/anselm/job-priority/) section. The resources are accessible via several queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources. The following table provides the queue partitioning overview:
+!!! note
+    Check the queue status at <https://extranet.it4i.cz/anselm/>
+| queue               | active project | project resources    | nodes                                                | min ncpus | priority | authorization | walltime |
+| ------------------- | -------------- | -------------------- | ---------------------------------------------------- | --------- | -------- | ------------- | -------- |
+| qexp                | no             | none required        | 209 nodes                                            | 1         | 150      | no            | 1 h      |
+| qprod               | yes            | > 0                  | 180 nodes w/o accelerator                            | 16        | 0        | no            | 24/48 h  |
+| qlong               | yes            | > 0                  | 180 nodes w/o accelerator                            | 16        | 0        | no            | 72/144 h |
+| qnvidia, qmic       | yes            | > 0                  | 23 nvidia nodes, 4 mic nodes                         | 16        | 200      | yes           | 24/48 h  |
+| qfat                | yes            | > 0                  | 2 fat nodes                                          | 16        | 200      | yes           | 24/144 h |
+| qfree               | yes            | < 120% of allocation | 180 w/o accelerator                                  | 16        | -1024    | no            | 12 h     |
+!!! note
+ **The qfree queue is not free of charge**. [Normal accounting](#resources-accounting-policy) applies. However, it allows for utilization of free resources, once a project has exhausted all its allocated computational resources. This does not apply to Director's Discretion projects (DD projects) by default. Usage of qfree after exhaustion of DD projects' computational resources is allowed after request for this queue.
+**The qexp queue is equipped with nodes which do not have exactly the same CPU clock speed.** Should you need the nodes to have exactly the same CPU speed, you have to select the proper nodes during the PSB job submission.
+* **qexp**, the Express queue: This queue is dedicated to testing and running very small jobs. It is not required to specify a project to enter the qexp. There are always 2 nodes reserved for this queue (w/o accelerators), a maximum 8 nodes are available via the qexp for a particular user, from a pool of nodes containing Nvidia accelerated nodes (cn181-203), MIC accelerated nodes (cn204-207) and Fat nodes with 512GB of RAM (cn208-209). This enables us to test and tune accelerated code and code with higher RAM requirements. The nodes may be allocated on a per core basis. No special authorization is required to use qexp. The maximum runtime in qexp is 1 hour.
+* **qprod**, the Production queue: This queue is intended for normal production runs. It is required that an active project with nonzero remaining resources is specified to enter the qprod. All nodes may be accessed via the qprod queue, except the reserved ones. 178 nodes without accelerators are included. Full nodes, 16 cores per node, are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qprod is 48 hours.
+* **qlong**, the Long queue: This queue is intended for long production runs. It is required that an active project with nonzero remaining resources is specified to enter the qlong. Only 60 nodes without acceleration may be accessed via the qlong queue. Full nodes, 16 cores per node, are allocated. The queue runs with medium priority and no special authorization is required to use it. The maximum runtime in qlong is 144 hours (three times that of the standard qprod time - 3 x 48 h).
+* **qnvidia**, qmic, qfat, the Dedicated queues: The queue qnvidia is dedicated to accessing the Nvidia accelerated nodes, the qmic to accessing MIC nodes and qfat the Fat nodes. It is required that an active project with nonzero remaining resources is specified to enter these queues. 23 nvidia, 4 mic, and 2 fat nodes are included. Full nodes, 16 cores per node, are allocated. The queues run with very high priority, the jobs will be scheduled before the jobs coming from the qexp queue. An PI needs to explicitly ask [support](https://support.it4i.cz/rt/) for authorization to enter the dedicated queues for all users associated with her/his project.
+* **qfree**, The Free resource queue: The queue qfree is intended for utilization of free resources, after a project has exhausted all of its allocated computational resources (Does not apply to DD projects by default; DD projects have to request persmission to use qfree after exhaustion of computational resources). It is required that active project is specified to enter the queue. Consumed resources will be accounted to the Project. Access to the qfree queue is automatically removed if consumed resources exceed 120% of the resources allocated to the Project. Only 180 nodes without accelerators may be accessed from this queue. Full nodes, 16 cores per node, are allocated. The queue runs with very low priority and no special authorization is required to use it. The maximum runtime in qfree is 12 hours.
+## Queue Notes
+The job wall clock time defaults to **half the maximum time**, see the table above. Longer wall time limits can be  [set manually, see examples](/anselm/job-submission-and-execution/).
+Jobs that exceed the reserved wall clock time (Req'd Time) get killed automatically. The wall clock time limit can be changed for queuing jobs (state Q) using the qalter command, however it cannot be changed for a running job (state R).
+Anselm users may check the current queue configuration at [https://extranet.it4i.cz/anselm/queues](https://extranet.it4i.cz/anselm/queues).
+## Queue Status
+!!! tip
+    Check the status of jobs, queues and compute nodes at [https://extranet.it4i.cz/anselm/](https://extranet.it4i.cz/anselm/)
+![rspbs web interface](../img/rsweb.png)
+Display the queue status on Anselm:
+```console
+$ qstat -q
+```
+The PBS allocation overview may be obtained also using the rspbs command:
+```console
+$ rspbs
+Usage: rspbs [options]
+Options:
+  --version             show program's version number and exit
+  -h, --help            show this help message and exit
+  --get-node-ncpu-chart
+                        Print chart of allocated ncpus per node
+  --summary             Print summary
+  --get-server-details Print server
+  --get-queues          Print queues
+  --get-queues-details Print queues details
+  --get-reservations    Print reservations
+  --get-reservations-details
+                        Print reservations details
+  --get-nodes           Print nodes of PBS complex
+  --get-nodeset         Print nodeset of PBS complex
+  --get-nodes-details   Print nodes details
+  --get-jobs            Print jobs
+  --get-jobs-details    Print jobs details
+  --get-jobs-check-params
+                        Print jobid, job state, session_id, user, nodes
+  --get-users           Print users of jobs
+  --get-allocated-nodes
+                        Print allocated nodes of jobs
+  --get-allocated-nodeset
+                        Print allocated nodeset of jobs
+  --get-node-users      Print node users
+  --get-node-jobs       Print node jobs
+  --get-node-ncpus      Print number of ncpus per node
+  --get-node-allocated-ncpus
+                        Print number of allocated ncpus per node
+  --get-node-qlist      Print node qlist
+  --get-node-ibswitch   Print node ibswitch
+  --get-user-nodes      Print user nodes
+  --get-user-nodeset    Print user nodeset
+  --get-user-jobs       Print user jobs
+  --get-user-jobc       Print number of jobs per user
+  --get-user-nodec      Print number of allocated nodes per user
+  --get-user-ncpus      Print number of allocated ncpus per user
+  --get-qlist-nodes     Print qlist nodes
+  --get-qlist-nodeset   Print qlist nodeset
+  --get-ibswitch-nodes Print ibswitch nodes
+  --get-ibswitch-nodeset
+                        Print ibswitch nodeset
+  --state=STATE         Only for given job state
+  --jobid=JOBID         Only for given job ID
+  --user=USER           Only for given user
+  --node=NODE           Only for given node
+  --nodestate=NODESTATE
+                        Only for given node state (affects only --get-node*
+                        --get-qlist-* --get-ibswitch-* actions)
+  --incl-finished       Include finished jobs
+```
+---8<--- "resource_accounting.md"
+---8<--- "mathjax.md"
--- a/docs.it4i/anselm/shell-and-data-access.md
+++ b/docs.it4i/anselm/shell-and-data-access.md
+# Accessing the Cluster
+## Shell Access
+The Anselm cluster is accessed by SSH protocol via login nodes login1 and login2 at the address anselm.it4i.cz. The login nodes may be addressed specifically, by prepending the login node name to the address.
+| Login address         | Port | Protocol | Login node                                   |
+| --------------------- | ---- | -------- | -------------------------------------------- |
+| anselm.it4i.cz        | 22   | ssh      | round-robin DNS record for login1 and login2 |
+| login1.anselm.it4i.cz | 22   | ssh      | login1                                       |
+| login2.anselm.it4i.cz | 22   | ssh      | login2                                       |
+Authentication is by [private key](../../general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys/)
+!!! note
+    Please verify SSH fingerprints during the first logon. They are identical on all login nodes:
+    md5:
+    29:b3:f4:64:b0:73:f5:6f:a7:85:0f:e0:0d:be:76:bf (DSA)
+    d4:6f:5c:18:f4:3f:70:ef:bc:fc:cc:2b:fd:13:36:b7 (RSA)
+    sha256:
+    LX2034TYy6Lf0Q7Zf3zOIZuFlG09DaSGROGBz6LBUy4 (DSA)
+    +DcED3GDoA9piuyvQOho+ltNvwB9SJSYXbB639hbejY (RSA)
+Private key authentication:
+On **Linux** or **Mac**, use:
+```console
+$ ssh -i /path/to/id_rsa username@anselm.it4i.cz
+```
+If you see a warning message "UNPROTECTED PRIVATE KEY FILE!", use this command to set lower permissions to the private key file:
+```console
+$ chmod 600 /path/to/id_rsa
+```
+On **Windows**, use [PuTTY ssh client](../general/accessing-the-clusters/shell-access-and-data-transfer/putty.md).
+After logging in, you will see the command prompt:
+```console
+                                            _
+                       /\                  | |
+                      /  \   _ __  ___  ___| |_ __ ___
+                     / /\ \ | '_ \/ __|/ _ \ | '_ ` _ \
+                    / ____ \| | | \__ \  __/ | | | | | |
+                   /_/    \_\_| |_|___/\___|_|_| |_| |_|
+                        http://www.it4i.cz/?lang=en
+Last login: Tue Jul 9 15:57:38 2013 from your-host.example.com
+[username@login2.anselm ~]$
+```
+Example to the cluster login:
+!!! note
+    The environment is **not** shared between login nodes, except for [shared filesystems](storage/#shared-filesystems).
+## Data Transfer
+Data in and out of the system may be transferred by the [scp](http://en.wikipedia.org/wiki/Secure_copy) and sftp protocols. (Not available yet). In the case that large volumes of data are transferred, use the dedicated data mover node dm1.anselm.it4i.cz for increased performance.
+| Address               | Port | Protocol  |
+| --------------------- | ---- | --------- |
+| anselm.it4i.cz        | 22   | scp       |
+| login1.anselm.it4i.cz | 22   | scp       |
+| login2.anselm.it4i.cz | 22   | scp       |
+Authentication is by [private key](../general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md)
+!!! note
+    Data transfer rates of up to **160MB/s** can be achieved with scp or sftp.
+    1TB may be transferred in 1:50h.
+To achieve 160MB/s transfer rates, the end user must be connected by 10G line all the way to IT4Innovations, and be using a computer with a fast processor for the transfer. When using a Gigabit ethernet connection, up to 110MB/s transfer rates may be expected.  Fast cipher (aes128-ctr) should be used.
+!!! note
+    If you experience degraded data transfer performance, consult your local network provider.
+On linux or Mac, use an scp or sftp client to transfer data to Anselm:
+```console
+$ scp -i /path/to/id_rsa my-local-file username@anselm.it4i.cz:directory/file
+```
+```console
+$ scp -i /path/to/id_rsa -r my-local-dir username@anselm.it4i.cz:directory
+```
+or
+```console
+$ sftp -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz
+```
+A very convenient way to transfer files in and out of Anselm is via the fuse filesystem [sshfs](http://linux.die.net/man/1/sshfs)
+```console
+$ sshfs -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz:. mountpoint
+```
+Using sshfs, the users Anselm home directory will be mounted on your local computer, just like an external disk.
+Learn more about ssh, scp and sshfs by reading the manpages
+```console
+$ man ssh
+$ man scp
+$ man sshfs
+```
+On Windows, use the [WinSCP client](http://winscp.net/eng/download.php) to transfer the data. The [win-sshfs client](http://code.google.com/p/win-sshfs/) provides a way to mount the Anselm filesystems directly as an external disc.
+More information about the shared file systems is available [here](access/storage/).
+## Connection Restrictions
+Outgoing connections, from Anselm Cluster login nodes to the outside world, are restricted to the following ports:
+| Port | Protocol |
+| ---- | -------- |
+| 22   | ssh      |
+| 80   | http     |
+| 443  | https    |
+| 9418 | git      |
+!!! note
+    Please use **ssh port forwarding** and proxy servers to connect from Anselm to all other remote ports.
+Outgoing connections, from Anselm Cluster compute nodes are restricted to the internal network. Direct connections form compute nodes to the outside world are cut.
+## Port Forwarding
+### Port Forwarding From Login Nodes
+!!! note
+    Port forwarding allows an application running on Anselm to connect to arbitrary remote hosts and ports.
+It works by tunneling the connection from Anselm back to users' workstations and forwarding from the workstation to the remote host.
+Pick some unused port on the Anselm login node  (for example 6000) and establish the port forwarding:
+```console
+$ ssh -R 6000:remote.host.com:1234 anselm.it4i.cz
+```
+In this example, we establish port forwarding between port 6000 on Anselm and port 1234 on the remote.host.com. By accessing localhost:6000 on Anselm, an application will see the response of remote.host.com:1234. The traffic will run via the user's local workstation.
+Port forwarding may be done **using PuTTY** as well. On the PuTTY Configuration screen, load your Anselm configuration first. Then go to Connection->SSH->Tunnels to set up the port forwarding. Click Remote radio button. Insert 6000 to theSource port textbox. Insert remote.host.com:1234. Click the Add button, then Open.
+Port forwarding may be established directly to the remote host. However, this requires that the user has ssh access to remote.host.com
+```console
+$ ssh -L 6000:localhost:1234 remote.host.com
+```
+!!! note
+    Port number 6000 is chosen as an example only. Pick any free port.
+### Port Forwarding From Compute Nodes
+Remote port forwarding from compute nodes allows applications running on the compute nodes to access hosts outside the Anselm Cluster.
+First, establish the remote port forwarding form the login node, as [described above](#port-forwarding-from-login-nodes).
+Second, invoke port forwarding from the compute node to the login node. Insert the following line into your jobscript or interactive shell;
+```console
+$ ssh  -TN -f -L 6000:localhost:6000 login1
+```
+In this example, we assume that port forwarding from login1:6000 to remote.host.com:1234 has been established beforehand. By accessing localhost:6000, an application running on a compute node will see the response of remote.host.com:1234
+### Using Proxy Servers
+Port forwarding is static, each single port is mapped to a particular port on a remote host. Connection to another remote host requires a new forward.
+!!! note
+    Applications with inbuilt proxy support experience unlimited access to remote hosts via a single proxy server.
+To establish a local proxy server on your workstation, install and run SOCKS proxy server software. On Linux, sshd demon provides the functionality. To establish SOCKS proxy server listening on port 1080 run:
+```console
+$ ssh -D 1080 localhost
+```
+On Windows, install and run the free, open source [Sock Puppet](http://sockspuppet.com/) server.
+Once the proxy server is running, establish ssh port forwarding from Anselm to the proxy server, port 1080, exactly as [described above](#port-forwarding-from-login-nodes):
+```console
+$ ssh -R 6000:localhost:1080 anselm.it4i.cz
+```
+Now, configure the applications proxy settings to **localhost:6000**. Use port forwarding to access the [proxy server from compute nodes](#port-forwarding-from-compute-nodes) as well.
+## Graphical User Interface
+* The [X Window system](/general/accessing-the-clusters/graphical-user-interface/x-window-system/) is the principal way to get GUI access to the clusters.
+* [Virtual Network Computing](/general/accessing-the-clusters/graphical-user-interface/vnc/) is a graphical [desktop sharing](http://en.wikipedia.org/wiki/Desktop_sharing) system that uses the [Remote Frame Buffer protocol](http://en.wikipedia.org/wiki/RFB_protocol) to remotely control another [computer](http://en.wikipedia.org/wiki/Computer).
+## VPN Access
+* Access IT4Innovations internal resources via [VPN](/general/accessing-the-clusters/vpn-access/).
--- a/docs.it4i/anselm/software/nvidia-cuda.md
+++ b/docs.it4i/anselm/software/nvidia-cuda.md
+# NVIDIA CUDA
+Guide to NVIDIA CUDA Programming and GPU Usage
+## CUDA Programming on Anselm
+The default programming model for GPU accelerators on Anselm is Nvidia CUDA. To set up the environment for CUDA use;
+```console
+$ ml av cuda
+$ ml cuda **or** ml CUDA
+```
+If the user code is hybrid and uses both CUDA and MPI, the MPI environment has to be set up as well. One way to do this is to use the PrgEnv-gnu module, which sets up the correct combination of the GNU compiler and MPI library;
+```console
+$ ml PrgEnv-gnu
+```
+CUDA code can be compiled directly on login1 or login2 nodes. The user does not have to use compute nodes with GPU accelerators for compilation. To compile CUDA source code, use an nvcc compiler;
+```console
+$ nvcc --version
+```
+The CUDA Toolkit comes with large number of examples which can be a helpful reference to start with. To compile and test these examples, users should copy them to their home directory;
+```console
+$ cd ~
+$ mkdir cuda-samples
+$ cp -R /apps/nvidia/cuda/6.5.14/samples/* ~/cuda-samples/
+```
+To compile examples, change directory to the particular example (here the example used is deviceQuery) and run "make" to start the compilation;
+```console
+$ cd ~/cuda-samples/1_Utilities/deviceQuery
+$ make
+```
+To run the code, the user can use PBS interactive session to get access to a node from qnvidia queue (note: use your project name with parameter -A in the qsub command) and execute the binary file;
+```console
+$ qsub -I -q qnvidia -A OPEN-0-0
+$ ml cuda
+$ ~/cuda-samples/1_Utilities/deviceQuery/deviceQuery
+```
+The expected output of the deviceQuery example executed on a node with a Tesla K20m is;
+```console
+    CUDA Device Query (Runtime API) version (CUDART static linking)
+    Detected 1 CUDA Capable device(s)
+    Device 0: "Tesla K20m"
+    CUDA Driver Version / Runtime Version 5.0 / 5.0
+    CUDA Capability Major/Minor version number: 3.5
+    Total amount of global memory: 4800 MBytes (5032706048 bytes)
+    (13) Multiprocessors x (192) CUDA Cores/MP: 2496 CUDA Cores
+    GPU Clock rate: 706 MHz (0.71 GHz)
+    Memory Clock rate: 2600 Mhz
+    Memory Bus Width: 320-bit
+    L2 Cache Size: 1310720 bytes
+    Max Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536,65536), 3D=(4096,4096,4096)
+    Max Layered Texture Size (dim) x layers 1D=(16384) x 2048, 2D=(16384,16384) x 2048
+    Total amount of constant memory: 65536 bytes
+    Total amount of shared memory per block: 49152 bytes
+    Total number of registers available per block: 65536
+    Warp size: 32
+    Maximum number of threads per multiprocessor: 2048
+    Maximum number of threads per block: 1024
+    Maximum sizes of each dimension of a block: 1024 x 1024 x 64
+    Maximum sizes of each dimension of a grid: 2147483647 x 65535 x 65535
+    Maximum memory pitch: 2147483647 bytes
+    Texture alignment: 512 bytes
+    Concurrent copy and kernel execution: Yes with 2 copy engine(s)
+    Run time limit on kernels: No
+    Integrated GPU sharing Host Memory: No
+    Support host page-locked memory mapping: Yes
+    Alignment requirement for Surfaces: Yes
+    Device has ECC support: Enabled
+    Device supports Unified Addressing (UVA): Yes
+    Device PCI Bus ID / PCI location ID: 2 / 0
+    Compute Mode:
+    < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
+    deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 5.0, CUDA Runtime Version = 5.0, NumDevs = 1, Device0 = Tesla K20m
+```
+### Code Example
+In this section we provide a basic CUDA based vector addition code example. You can directly copy and paste the code to test it.
+```cpp
+$ vim test.cu
+#define N (2048*2048)
+#define THREADS_PER_BLOCK 512
+#include <stdio.h>
+#include <stdlib.h>
+// GPU kernel function to add two vectors
+__global__ void add_gpu( int *a, int *b, int *c, int n){
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index < n)
+    c[index] = a[index] + b[index];
+}
+// CPU function to add two vectors
+void add_cpu (int *a, int *b, int *c, int n) {
+  for (int i=0; i < n; i++)
+    c[i] = a[i] + b[i];
+}
+// CPU function to generate a vector of random integers
+void random_ints (int *a, int n) {
+  for (int i = 0; i < n; i++)
+  a[i] = rand() % 10000; // random number between 0 and 9999
+}
+// CPU function to compare two vectors
+int compare_ints( int *a, int *b, int n ){
+  int pass = 0;
+  for (int i = 0; i < N; i++){
+    if (a[i] != b[i]) {
+      printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]);
+      pass = 1;
+    }
+  }
+  if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn");
+  return pass;
+}
+int main( void ) {
+  int *a, *b, *c; // host copies of a, b, c
+  int *dev_a, *dev_b, *dev_c; // device copies of a, b, c
+  int size = N * sizeof( int ); // we need space for N integers
+  // Allocate GPU/device copies of dev_a, dev_b, dev_c
+  cudaMalloc( (void**)&dev_a, size );
+  cudaMalloc( (void**)&dev_b, size );
+  cudaMalloc( (void**)&dev_c, size );
+  // Allocate CPU/host copies of a, b, c
+  a = (int*)malloc( size );
+  b = (int*)malloc( size );
+  c = (int*)malloc( size );
+  // Fill input vectors with random integer numbers
+  random_ints( a, N );
+  random_ints( b, N );
+  // copy inputs to device
+  cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice );
+  cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice );
+  // launch add_gpu() kernel with blocks and threads
+  add_gpu<<< N/THREADS_PER_BLOCK, THREADS_PER_BLOCK >>( dev_a, dev_b, dev_c, N );
+  // copy device result back to host copy of c
+  cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost );
+  //Check the results with CPU implementation
+  int *c_h; c_h = (int*)malloc( size );
+  add_cpu (a, b, c_h, N);
+  compare_ints(c, c_h, N);
+  // Clean CPU memory allocations
+  free( a ); free( b ); free( c ); free (c_h);
+  // Clean GPU memory allocations
+  cudaFree( dev_a );
+  cudaFree( dev_b );
+  cudaFree( dev_c );
+  return 0;
+}
+```
+This code can be compiled using the following command;
+```console
+$ nvcc test.cu -o test_cuda
+```
+To run the code, use an interactive PBS session to get access to one of the GPU accelerated nodes;
+```console
+$ qsub -I -q qnvidia -A OPEN-0-0
+$ ml cuda
+$ ./test.cuda
+```
+## CUDA Libraries
+### cuBLAS
+The NVIDIA CUDA Basic Linear Algebra Subroutines (cuBLAS) library is a GPU-accelerated version of the complete standard BLAS library with 152 standard BLAS routines. A basic description of the library together with basic performance comparisons with MKL can be found [here](https://developer.nvidia.com/cublas "Nvidia cuBLAS").
+#### cuBLAS Example: SAXPY
+The SAXPY function multiplies the vector x by the scalar alpha, and adds it to the vector y, overwriting the latest vector with the result. A description of the cuBLAS function can be found in [NVIDIA CUDA documentation](http://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-axpy "Nvidia CUDA documentation "). Code can be pasted in the file and compiled without any modification.
+```cpp
+/* Includes, system */
+#include <stdio.h>
+#include <stdlib.h>
+/* Includes, cuda */
+#include <cuda_runtime.h>
+#include <cublas_v2.h>
+/* Vector size */
+#define N  (32)
+/* Host implementation of a simple version of saxpi */
+void saxpy(int n, float alpha, const float *x, float *y)
+{
+    for (int i = 0; i < n; ++i)
+    y[i] = alpha*x[i] + y[i];
+}
+/* Main */
+int main(int argc, char **argv)
+{
+    float *h_X, *h_Y, *h_Y_ref;
+    float *d_X = 0;
+    float *d_Y = 0;
+    const float alpha = 1.0f;
+    int i;
+    cublasHandle_t handle;
+    /* Initialize CUBLAS */
+    printf("simpleCUBLAS test running..n");
+    cublasCreate(&handle);
+    /* Allocate host memory for the matrices */
+    h_X = (float *)malloc(N * sizeof(h_X[0]));
+    h_Y = (float *)malloc(N * sizeof(h_Y[0]));
+    h_Y_ref = (float *)malloc(N * sizeof(h_Y_ref[0]));
+    /* Fill the matrices with test data */
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand() / (float)RAND_MAX;
+        h_Y[i] = rand() / (float)RAND_MAX;
+        h_Y_ref[i] = h_Y[i];
+    }
+    /* Allocate device memory for the matrices */
+    cudaMalloc((void **)&d_X, N * sizeof(d_X[0]));
+    cudaMalloc((void **)&d_Y, N * sizeof(d_Y[0]));
+    /* Initialize the device matrices with the host matrices */
+    cublasSetVector(N, sizeof(h_X[0]), h_X, 1, d_X, 1);
+    cublasSetVector(N, sizeof(h_Y[0]), h_Y, 1, d_Y, 1);
+    /* Performs operation using plain C code */
+    saxpy(N, alpha, h_X, h_Y_ref);
+    /* Performs operation using cublas */
+    cublasSaxpy(handle, N, &alpha, d_X, 1, d_Y, 1);
+    /* Read the result back */
+    cublasGetVector(N, sizeof(h_Y[0]), d_Y, 1, h_Y, 1);
+    /* Check result against reference */
+    for (i = 0; i < N; ++i)
+        printf("CPU res = %f t GPU res = %f t diff = %f n", h_Y_ref[i], h_Y[i], h_Y_ref[i] - h_Y[i]);
+    /* Memory clean up */
+    free(h_X); free(h_Y); free(h_Y_ref);
+    cudaFree(d_X); cudaFree(d_Y);
+    /* Shutdown */
+    cublasDestroy(handle);
+}
+```
+!!! note
+    cuBLAS has its own function for data transfers between CPU and GPU memory:
+    - [cublasSetVector](http://docs.nvidia.com/cuda/cublas/index.html#cublassetvector) - transfers data from CPU to GPU memory
+    - [cublasGetVector](http://docs.nvidia.com/cuda/cublas/index.html#cublasgetvector) - transfers data from GPU to CPU memory
+To compile the code using the NVCC compiler a "-lcublas" compiler flag has to be specified:
+```console
+$ ml cuda
+$ nvcc -lcublas test_cublas.cu -o test_cublas_nvcc
+```
+To compile the same code with GCC:
+```console
+$ ml cuda
+$ gcc -std=c99 test_cublas.c -o test_cublas_icc -lcublas -lcudart
+```
+To compile the same code with an Intel compiler:
+```console
+$ ml cuda
+$ ml intel
+$ icc -std=c99 test_cublas.c -o test_cublas_icc -lcublas -lcudart
+```
--- a/docs.it4i/anselm/storage.md
+++ b/docs.it4i/anselm/storage.md
--- a/docs.it4i/apiv1.md
+++ b/docs.it4i/apiv1.md
+# API Placeholder
+This page is beeing create automatically from the API source code.
--- a/docs.it4i/environment-and-modules.md
+++ b/docs.it4i/environment-and-modules.md
+# Environment and Modules
+## Environment Customization
+After logging in, you may want to configure the environment. Write your preferred path definitions, aliases, functions and module loads in the .bashrc file
+```console
+# ./bashrc
+# users compilation path
+export MODULEPATH=${MODULEPATH}:/home/$USER/.local/easybuild/modules/all
+# User specific aliases and functions
+alias qs='qstat -a'
+# load default intel compilator !!! is not recommended !!!
+ml intel
+# Display information to standard output - only in interactive ssh session
+if [ -n "$SSH_TTY" ]
+then
+ ml # Display loaded modules
+fi
+```
+!!! note
+    Do not run commands outputting to standard output (echo, module list, etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental functionality (SCP, PBS) of your account! Take care for SSH session interactivity for such commands as stated in the previous example.
+### Application Modules
+In order to configure your shell for running particular application on clusters we use Module package interface.
+Application modules on clusters are built using [EasyBuild](/software/tools/easybuild/). The modules are divided into the following structure:
+```
+ base: Default module class
+ bio: Bioinformatics, biology and biomedical
+ cae: Computer Aided Engineering (incl. CFD)
+ chem: Chemistry, Computational Chemistry and Quantum Chemistry
+ compiler: Compilers
+ data: Data management & processing tools
+ debugger: Debuggers
+ devel: Development tools
+ geo: Earth Sciences
+ ide: Integrated Development Environments (e.g. editors)
+ lang: Languages and programming aids
+ lib: General purpose libraries
+ math: High-level mathematical software
+ mpi: MPI stacks
+ numlib: Numerical Libraries
+ perf: Performance tools
+ phys: Physics and physical systems simulations
+ system: System utilities (e.g. highly depending on system OS and hardware)
+ toolchain: EasyBuild toolchains
+ tools: General purpose tools
+ vis: Visualization, plotting, documentation and typesetting
+ OS: singularity image
+ python: python packages
+```
+!!! note
+    The modules set up the application paths, library paths and environment variables for running particular application.
+The modules may be loaded, unloaded and switched, according to momentary needs. For details see [here](/software/modules/lmod/).
--- a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/vnc.md
+++ b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/vnc.md
+# VNC
+The **Virtual Network Computing** (**VNC**) is a graphical [desktop sharing](http://en.wikipedia.org/wiki/Desktop_sharing "Desktop sharing") system that uses the [Remote Frame Buffer protocol (RFB)](http://en.wikipedia.org/wiki/RFB_protocol "RFB protocol") to remotely control another [computer](http://en.wikipedia.org/wiki/Computer "Computer"). It transmits the [keyboard](http://en.wikipedia.org/wiki/Computer_keyboard "Computer keyboard") and [mouse](http://en.wikipedia.org/wiki/Computer_mouse") events from one computer to another, relaying the graphical [screen](http://en.wikipedia.org/wiki/Computer_screen "Computer screen") updates back in the other direction, over a [network](http://en.wikipedia.org/wiki/Computer_network "Computer network").
+Vnc-based connections are usually faster (require less network bandwidth) then [X11](/general/accessing-the-clusters/graphical-user-interface/x-window-system) applications forwarded directly through ssh.
+The recommended clients are [TightVNC](http://www.tightvnc.com) or [TigerVNC](http://sourceforge.net/apps/mediawiki/tigervnc/index.php?title=Main_Page) (free, open source, available for almost any platform).
+In this chapter we show how to create an underlying ssh tunnel from your client machine to one of our login nodes. Then, how to start your own vnc server on our login node and finally how to connect to your vnc server via the encrypted ssh tunnel.
+## Create VNC Password
+!!! note
+    Local VNC password should be set before the first login. Do use a strong password.
+```console
+[username@login2 ~]$ vncpasswd
+Password:
+Verify:
+```
+## Start Vncserver
+!!! note
+    To access VNC a local vncserver must be started first and also a tunnel using SSH port forwarding must be established.
+[See below](#linuxmac-os-example-of-creating-a-tunnel) for the details on SSH tunnels.
+You should start by **choosing your display number**.
+To choose free one, you should check currently occupied display numbers - list them using command:
+```console
+[username@login2 ~]$ ps aux | grep Xvnc | sed -rn 's/(\s) .*Xvnc (\:[0-9]+) .*/\1 \2/p'
+username :79
+username :60
+.....
+```
+As you can see above, displays ":79" and ":60" we had occupied already.
+Generally, you can choose display number freely *except these occupied numbers*.
+Also remember that display number should be less or equal 99.
+Based on this **we have choosen display number 61** for us, so this number you can see in examples below.
+!!! note
+    Your situation may be different so also choose of your number may be different. **Please choose and use your own display number accordingly!**
+Start your VNC server on choosen display number (61):
+```console
+[username@login2 ~]$ vncserver :61 -geometry 1600x900 -depth 16
+New 'login2:1 (username)' desktop is login2:1
+Starting applications specified in /home/username/.vnc/xstartup
+Log file is /home/username/.vnc/login2:1.log
+```
+Check whether VNC server is running on choosen display number (61):
+```console
+[username@login2 .vnc]$ vncserver -list
+TigerVNC server sessions:
+X DISPLAY #     PROCESS ID
+:61              18437
+```
+Another way to check it:
+```console
+[username@login2 .vnc]$  ps aux | grep Xvnc | sed -rn 's/(\s) .*Xvnc (\:[0-9]+) .*/\1 \2/p'
+username :61
+username :102
+```
+!!! note
+    The VNC server runs on port 59xx, where xx is the display number. So, you get your port number simply as 5900 + display number, in our example 5900 + 61 = 5961. Another example for display number 102 is calculation of TCP port 5900 + 102 = 6002 but be aware, that TCP ports above 6000 are often used by X11. **Please, calculate your own port number and use it instead of 5961 from examples below!**
+To access the VNC server you have to create a tunnel between the login node using TCP port 5961 and your machine using a free TCP port (for simplicity the very same) in next step. See examples for [Linux/Mac OS](#linuxmac-os-example-of-creating-a-tunnel) and [Windows](#windows-example-of-creating-a-tunnel).
+!!! note
+    The tunnel must point to the same login node where you launched the VNC server, eg. login2. If you use just cluster-name.it4i.cz, the tunnel might point to a different node due to DNS round robin.
+## Linux/Mac OS Example of Creating a Tunnel
+At your machine, create the tunnel:
+```console
+local $  ssh -TN -f username@login2.cluster-name.it4i.cz -L 5961:localhost:5961
+```
+Issue the following command to check the tunnel is established (note the PID 2022 in the last column, you'll need it for closing the tunnel):
+```console
+local $ netstat -natp | grep 5961
+(Not all processes could be identified, non-owned process info
+ will not be shown, you would have to be root to see it all.)
+tcp        0      0 127.0.0.1:5961          0.0.0.0:*               LISTEN      2022/ssh
+tcp6       0      0 ::1:5961                :::*                    LISTEN      2022/ssh
+```
+Or on Mac OS use this command:
+```console
+local-mac $ lsof -n -i4TCP:5961 | grep LISTEN
+ssh 75890 sta545 7u IPv4 0xfb062b5c15a56a3b 0t0 TCP 127.0.0.1:5961 (LISTEN)
+```
+Connect with the VNC client:
+```console
+local $ vncviewer 127.0.0.1:5961
+```
+In this example, we connect to VNC server on port 5961, via the ssh tunnel. The connection is encrypted and secured. The VNC server listening on port 5961 provides screen of 1600x900 pixels.
+You have to destroy the SSH tunnel which is still running at the background after you finish the work. Use the following command (PID 2022 in this case, see the netstat command above):
+```console
+kill 2022
+```
+## Windows Example of Creating a Tunnel
+Start vncserver using command vncserver described above.
+Search for the localhost and port number (in this case 127.0.0.1:5961).
+```console
+[username@login2 .vnc]$ netstat -tanp | grep Xvnc
+(Not all processes could be identified, non-owned process info
+ will not be shown, you would have to be root to see it all.)
+tcp        0      0 127.0.0.1:5961              0.0.0.0:*                   LISTEN      24031/Xvnc
+```
+### PuTTY
+On the PuTTY Configuration screen go to Connection->SSH->Tunnels to set up the tunnel.
+Fill the Source port and Destination fields. **Do not forget to click the Add button**.
+![](../../../img/putty-tunnel.png)
+### WSL (Bash on Windows)
+[Windows Subsystem for Linux](http://docs.microsoft.com/en-us/windows/wsl) is another way to run Linux software in a Windows environment.
+At your machine, create the tunnel:
+```console
+local $ ssh username@login2.cluster-name.it4i.cz -L 5961:localhost:5961
+```
+## Example of Starting VNC Client
+Run the VNC client of your choice, select VNC server 127.0.0.1, port 5961 and connect using VNC password.
+### TigerVNC Viewer
+![](../../../img/vncviewer.png)
+In this example, we connect to VNC server on port 5961, via the ssh tunnel, using TigerVNC viewer. The connection is encrypted and secured. The VNC server listening on port 5961 provides screen of 1600x900 pixels.
+### TightVNC Viewer
+Use your VNC password to log using TightVNC Viewer and start a Gnome Session on the login node.
+![](../../../img/TightVNC_login.png)
+## Gnome Session
+You should see after the successful login.
+![](../../../img/gnome_screen.png)
+### Disable Your Gnome Session Screensaver
+Open Screensaver preferences dialog:
+![](../../../img/gdmscreensaver.png)
+Uncheck both options below the slider:
+![](../../../img/gdmdisablescreensaver.png)
+### Kill Screensaver if Locked Screen
+If the screen gets locked you have to kill the screensaver. Do not to forget to disable the screensaver then.
+```console
+[username@login2 .vnc]$ ps aux | grep screen
+username     1503 0.0 0.0 103244   892 pts/4    S+   14:37   0:00 grep screen
+username     24316 0.0 0.0 270564 3528 ?        Ss   14:12   0:00 gnome-screensaver
+[username@login2 .vnc]$ kill 24316
+```
+## Kill Vncserver After Finished Work
+You should kill your VNC server using command:
+```console
+[username@login2 .vnc]$  vncserver  -kill :61
+Killing Xvnc process ID 7074
+Xvnc process ID 7074 already killed
+```
+Or this way:
+```console
+[username@login2 .vnc]$  pkill vnc
+```
+!!! note
+    Do not forget to terminate also SSH tunnel, if it was used. Look on end of [this section](#linuxmac-os-example-of-creating-a-tunnel) for the details.
+## GUI Applications on Compute Nodes Over VNC
+The very same methods as described above, may be used to run the GUI applications on compute nodes. However, for maximum performance, proceed following these steps:
+Open a Terminal (Applications -> System Tools -> Terminal). Run all the next commands in the terminal.
+![](../../../img/gnome-terminal.png)
+Allow incoming X11 graphics from the compute nodes at the login node:
+```console
+$ xhost +
+```
+Get an interactive session on a compute node (for more detailed info [look here](/anselm/job-submission-and-execution/)). Use the **-v DISPLAY** option to propagate the DISPLAY on the compute node. In this example, we want a complete node (16 cores in this example) from the production queue:
+```console
+$ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A PROJECT_ID -q qprod -l select=1:ncpus=16
+```
+Test that the DISPLAY redirection into your VNC session works, by running a X11 application (e. g. XTerm) on the assigned compute node:
+```console
+$ xterm
+```
+Example described above:
+![](../../../img/gnome-compute-nodes-over-vnc.png)
--- a/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md
+++ b/docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md
+# X Window System
+The X Window system is a principal way to get GUI access to the clusters. The **X Window System** (commonly known as **X11**, based on its current major version being 11, or shortened to simply **X**, and sometimes informally **X-Windows**) is a computer software system and network [protocol](http://en.wikipedia.org/wiki/Protocol_%28computing%29 "Protocol (computing)") that provides a basis for [graphical user interfaces](http://en.wikipedia.org/wiki/Graphical_user_interface "Graphical user interface") (GUIs) and rich input device capability for [networked computers](http://en.wikipedia.org/wiki/Computer_network "Computer network").
+!!! tip
+    The X display forwarding must be activated and the X server running on client side
+## X Display
+### Linux Example
+In order to display graphical user interface GUI of various software tools, you need to enable the X display forwarding. On Linux and Mac, log in using the -X option in ssh client:
+```console
+ local $ ssh -X username@cluster-name.it4i.cz
+```
+### PuTTY on Windows
+On Windows use the PuTTY client to enable X11 forwarding. In PuTTY menu, go to Connection-SSH-X11, mark the Enable X11 forwarding checkbox before logging in.
+![](../../../img/cygwinX11forwarding.png)
+Then log in as usual.
+### WSL (Bash on Windows)
+In order to display graphical user interface GUI of various software tools, you need to enable the X display forwarding. Log in using the -X option in ssh client:
+```console
+ local $ ssh -X username@cluster-name.it4i.cz
+```
+!!! tip
+    If you are getting error message "cannot open display", then try to export DISPLAY variable, before attempting to log in:
+```console
+ local $ export DISPLAY=localhost:0.0
+```
+## Verify the Forwarding
+To verify the forwarding, type
+```console
+$ echo $DISPLAY
+```
+if you receive something like
+```console
+localhost:10.0
+```
+then the X11 forwarding is enabled.
+## X Server
+In order to display graphical user interface GUI of various software tools, you need running X server on your desktop computer. For Linux users, no action is required as the X server is the default GUI environment on most Linux distributions. Mac and Windows users need to install and run the X server on their workstations.
+### X Server on OS X
+Mac OS users need to install [XQuartz server](https://www.xquartz.org).
+### X Server on Windows
+There are variety of X servers available for Windows environment. The commercial Xwin32 is very stable and rich featured. The Cygwin environment provides fully featured open-source XWin X server. For simplicity, we recommend open-source X server by the [Xming project](http://sourceforge.net/projects/xming/). For stability and full features we recommend the
+[XWin](http://x.cygwin.com/) X server by Cygwin
+| How to use Xwin | How to use Xming |
+|--- | --- |
+| [Install Cygwin](http://x.cygwin.com/) Find and execute XWin.exe to start the X server on Windows desktop computer.[If no able to forward X11 using PuTTY to CygwinX](#if-no-able-to-forward-x11-using-putty-to-cygwinx) | Use Xlaunch to configure the Xming. Run Xming to start the X server on Windows desktop computer. |
+Read more on [http://www.math.umn.edu/systems_guide/putty_xwin32.html](http://www.math.umn.edu/systems_guide/putty_xwin32.shtml)
+## Running GUI Enabled Applications
+!!! note
+    Make sure that X forwarding is activated and the X server is running.
+Then launch the application as usual. Use the & to run the application in background.
+```console
+$ ml intel (idb and gvim not installed yet)
+$ gvim &
+```
+```console
+$ xterm
+```
+In this example, we activate the intel programing environment tools, then start the graphical gvim editor.
+## GUI Applications on Compute Nodes
+Allocate the compute nodes using -X option on the qsub command
+```console
+$ qsub -q qexp -l select=2:ncpus=24 -X -I
+```
+In this example, we allocate 2 nodes via qexp queue, interactively. We request X11 forwarding with the -X option. It will be possible to run the GUI enabled applications directly on the first compute node.
+**Better performance** is obtained by logging on the allocated compute node via ssh, using the -X option.
+```console
+$ ssh -X r24u35n680
+```
+In this example, we log in on the r24u35n680 compute node, with the X11 forwarding enabled.
+## Gnome GUI Environment
+The Gnome 2.28 GUI environment is available on the clusters. We recommend to use separate X server window for displaying the Gnome environment.
+### Gnome on Linux and OS X
+To run the remote Gnome session in a window on Linux/OS X computer, you need to install Xephyr. Ubuntu package is
+xserver-xephyr, on OS X it is part of [XQuartz](http://xquartz.macosforge.org/landing/). First, launch Xephyr on local machine:
+```console
+local $ Xephyr -ac -screen 1024x768 -br -reset -terminate :1 &
+```
+This will open a new X window with size 1024 x 768 at DISPLAY :1. Next, ssh to the cluster with DISPLAY environment variable set and launch gnome-session
+```console
+local $ DISPLAY=:1.0 ssh -XC yourname@cluster-name.it4i.cz -i ~/.ssh/path_to_your_key
+... cluster-name MOTD...
+yourname@login1.cluster-namen.it4i.cz $ gnome-session &
+```
+On older systems where Xephyr is not available, you may also try Xnest instead of Xephyr. Another option is to launch a new X server in a separate console, via:
+```console
+xinit /usr/bin/ssh -XT -i .ssh/path_to_your_key yourname@cluster-namen.it4i.cz gnome-session -- :1 vt12
+```
+However this method does not seem to work with recent Linux distributions and you will need to manually source
+/etc/profile to properly set environment variables for PBS.
+### Gnome on Windows
+Use XLaunch to start the Xming server or run the XWin.exe. Select the "One window" mode.
+Log in to the cluster, using [PuTTY](#putty-on-windows) or [Bash on Windows](#wsl-bash-on-windows). On the cluster, run the gnome-session command.
+```console
+$ gnome-session &
+```
+In this way, we run remote gnome session on the cluster, displaying it in the local X server
+Use System-Log Out to close the gnome-session
No results found