diff --git a/README.md b/README.md index 91b77bc91d7e76c543320f1a2517ee696ae93607..6661d4e9d9b0e47eff88b4fdb332e00191322f55 100755 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ Pro svou práci si naklonujete Gitem repozitář do svĂ©ho pracovnĂho adresář |0.47|11m7s|pĹ™idánĂ filtrĹŻ pro opravu a nahrazenĂ tabulek, vylepšenĂ filtrace, ...| |0.51|16m37s|export do pdf, export do epub, filtry, ...| |0.52|16m37s|oprava exportu do pdf, novĂ˝ filtr pro poslednĂ opravu formátovánĂ a chyb, ...| +|0.53|17m37s|oprava exportu do pdf a epub| >**ProblĂ©my** > * internĂ a externĂ odkazy diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/Anselmprofile.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/Anselmprofile.jpg new file mode 100644 index 0000000000000000000000000000000000000000..92b9d8b3887e76cff92255955f354e364ed53130 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/Anselmprofile.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/anyconnectcontextmenu.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/anyconnectcontextmenu.jpg new file mode 100644 index 0000000000000000000000000000000000000000..26122f2f5965ce71489267d359eff21c39ed7960 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/anyconnectcontextmenu.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/anyconnecticon.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/anyconnecticon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d7397f5c43f659d71a6fe8413ad89dca8157bb48 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/anyconnecticon.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/downloadfilesuccessfull.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/downloadfilesuccessfull.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..be0d8c671fbdb1631a53698c5f53d8708cb3758e Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/downloadfilesuccessfull.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/executionaccess.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/executionaccess.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..24993c8e26e0d2c9951bbb47c24d5e37111e7ed1 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/executionaccess.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/executionaccess2.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/executionaccess2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..179ef898f65dc73e40a87de8db17afa207468135 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/executionaccess2.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/firstrun.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/firstrun.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fb26fe932626bfbf927874d3953606078a130fbb Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/firstrun.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/instalationfile.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/instalationfile.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..71fcc12197b4c6246d457bd3ecd75c5e971fc174 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/instalationfile.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/java_detection.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/java_detection.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..0c1707c03f1b27c8490efdb4bc1305aa5adbb74a Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/java_detection.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/login.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/login.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..a683564de8cf7ccd8bb24f2654fd3f2dca60bf79 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/login.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/logingui.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/logingui.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2a11a13f8c511c801ce67a963ee145f2dcbb6db9 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/logingui.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/loginwithprofile.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/loginwithprofile.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..568c771e5e696085ec86ddc5fe71d2b190ff6a3f Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/loginwithprofile.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.md b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.md new file mode 100644 index 0000000000000000000000000000000000000000..f43ed041f1efad27eea824b186d9e3f200856675 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.md @@ -0,0 +1,118 @@ +Outgoing connections +==================== + + + +Connection restrictions +----------------------- + +Outgoing connections, from Anselm Cluster login nodes to the outside +world, are restricted to following ports: + + |Port|Protocol| + |---|---| + |22|ssh| + |80|http| + |443|https| + |9418|git| + +Please use **ssh port forwarding** and proxy servers to connect from +Anselm to all other remote ports. + +Outgoing connections, from Anselm Cluster compute nodes are restricted +to the internal network. Direct connections form compute nodes to +outside world are cut. + +Port forwarding +--------------- + +### Port forwarding from login nodes + +Port forwarding allows an application running on Anselm to connect to +arbitrary remote host and port. + +It works by tunneling the connection from Anselm back to users +workstation and forwarding from the workstation to the remote host. + +Pick some unused port on Anselm login node (for example 6000) and +establish the port forwarding: + +` +local $ ssh -R 6000:remote.host.com:1234 anselm.it4i.cz +` + +In this example, we establish port forwarding between port 6000 on +Anselm and port 1234 on the remote.host.com. By accessing +localhost:6000 on Anselm, an application will see response of +remote.host.com:1234. The traffic will run via users local workstation. + +Port forwarding may be done **using PuTTY** as well. On the PuTTY +Configuration screen, load your Anselm configuration first. Then go to +Connection->SSH->Tunnels to set up the port forwarding. Click +Remote radio button. Insert 6000 to Source port textbox. Insert +remote.host.com:1234. Click Add button, then Open. + +Port forwarding may be established directly to the remote host. However, +this requires that user has ssh access to remote.host.com + +` +$ ssh -L 6000:localhost:1234 remote.host.com +` + +Note: Port number 6000 is chosen as an example only. Pick any free port. + +### Port forwarding from compute nodes + +Remote port forwarding from compute nodes allows applications running on +the compute nodes to access hosts outside Anselm Cluster. + +First, establish the remote port forwarding form the login node, as +[described +above](outgoing-connections.html#port-forwarding-from-login-nodes). + +Second, invoke port forwarding from the compute node to the login node. +Insert following line into your jobscript or interactive shell + +` +$ ssh -TN -f -L 6000:localhost:6000 login1 +` + +In this example, we assume that port forwarding from login1:6000 to +remote.host.com:1234 has been established beforehand. By accessing +localhost:6000, an application running on a compute node will see +response of remote.host.com:1234 + +### Using proxy servers + +Port forwarding is static, each single port is mapped to a particular +port on remote host. Connection to other remote host, requires new +forward. + +Applications with inbuilt proxy support, experience unlimited access to +remote hosts, via single proxy server. + +To establish local proxy server on your workstation, install and run +SOCKS proxy server software. On Linux, sshd demon provides the +functionality. To establish SOCKS proxy server listening on port 1080 +run: + +` +local $ ssh -D 1080 localhost +` + +On Windows, install and run the free, open source [Sock +Puppet](http://sockspuppet.com/) server. + +Once the proxy server is running, establish ssh port forwarding from +Anselm to the proxy server, port 1080, exactly as [described +above](outgoing-connections.html#port-forwarding-from-login-nodes). + +` +local $ ssh -R 6000:localhost:1080 anselm.it4i.cz +` + +Now, configure the applications proxy settings to **localhost:6000**. +Use port forwarding to access the [proxy server from compute +nodes](outgoing-connections.html#port-forwarding-from-compute-nodes) +as well . + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.md b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.md new file mode 100644 index 0000000000000000000000000000000000000000..7fb3a55a6994bc2a357e6d8f214af6f426c5ae25 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.md @@ -0,0 +1,137 @@ +Shell access and data transfer +============================== + + + +Interactive Login +----------------- + +The Anselm cluster is accessed by SSH protocol via login nodes login1 +and login2 at address anselm.it4i.cz. The login nodes may be addressed +specifically, by prepending the login node name to the address. + + Login address |Port|Protocol| Login node + ----------------------- |---|---| ---------------------------------------------- + anselm.it4i.cz |22|ssh| round-robin DNS record for login1 and login2 + login1.anselm.it4i.cz |22|ssh| login1 + login2.anselm.it4i.cz |22|ssh| login2 + +The authentication is by the [private +key](../../../get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.html) + +Please verify SSH fingerprints during the first logon. They are +identical on all login nodes: +29:b3:f4:64:b0:73:f5:6f:a7:85:0f:e0:0d:be:76:bf (DSA) +d4:6f:5c:18:f4:3f:70:ef:bc:fc:cc:2b:fd:13:36:b7 (RSA) + + + +Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)`s authentication: + +On **Linux** or **Mac**, use + +` +local $ ssh -i /path/to/id_rsa username@anselm.it4i.cz +` + +If you see warning message "UNPROTECTED PRIVATE KEY FILE!", use this +command to set lower permissions to private key file. + +` +local $ chmod 600 /path/to/id_rsa +` + +On **Windows**, use [PuTTY ssh +client](../../../get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty/putty.html). + +After logging in, you will see the command prompt: + + _ + / | | + / _ __ ___ ___| |_ __ ___ + / / | '_ / __|/ _ | '_ ` _ + / ____ | | | __ __/ | | | | | | + /_/ __| |_|___/___|_|_| |_| |_| + +                        http://www.it4i.cz/?lang=en + + Last login: Tue Jul 9 15:57:38 2013 from your-host.example.com + [username@login2.anselm ~]$ + +The environment is **not** shared between login nodes, except for +[shared filesystems](../storage-1.html#section-1). + +Data Transfer +------------- + +Data in and out of the system may be transferred by the +[scp](http://en.wikipedia.org/wiki/Secure_copy) and sftp +protocols. (Not available yet.) In case large +volumes of data are transferred, use dedicated data mover node +dm1.anselm.it4i.cz for increased performance. + + |Address|Port|Protocol| + -------------------- |---|---|- ----------------------------------------- + |anselm.it4i.cz|22|scp, sftp| + |login1.anselm.it4i.cz|22|scp, sftp| + |login2.anselm.it4i.cz|22|scp, sftp| + |dm1.anselm.it4i.cz|22|scp, sftp| + + The authentication is by the [private +key](../../../get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.html) + +Data transfer rates up to **160MB/s** can be achieved with scp or sftp. + may be transferred in 1:50h. + +To achieve 160MB/s transfer rates, the end user must be connected by 10G +line all the way to IT4Innovations and use computer with fast processor +for the transfer. Using Gigabit ethernet connection, up to 110MB/s may +be expected. Fast cipher (aes128-ctr) should be used. + +If you experience degraded data transfer performance, consult your local +network provider. + +On linux or Mac, use scp or sftp client to transfer the data to Anselm: + +` +local $ scp -i /path/to/id_rsa my-local-file username@anselm.it4i.cz:directory/file +` + +` +local $ scp -i /path/to/id_rsa -r my-local-dir username@anselm.it4i.cz:directory +` + + or + +` +local $ sftp -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz +` + +Very convenient way to transfer files in and out of the Anselm computer +is via the fuse filesystem +[sshfs](http://linux.die.net/man/1/sshfs) + +` +local $ sshfs -o IdentityFile=/path/to/id_rsa username@anselm.it4i.cz:. mountpoint +` + +Using sshfs, the users Anselm home directory will be mounted on your +local computer, just like an external disk. + +Learn more on ssh, scp and sshfs by reading the manpages + +` +$ man ssh +$ man scp +$ man sshfs +` + +On Windows, use [WinSCP +client](http://winscp.net/eng/download.php) to transfer +the data. The [win-sshfs +client](http://code.google.com/p/win-sshfs/) provides a +way to mount the Anselm filesystems directly as an external disc. + +More information about the shared file systems is available +[here](../../storage.html). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/successfullconnection.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/successfullconnection.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a7e8d5b749f86f54558586158ea7a00bf666961c Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/successfullconnection.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/successfullinstalation.jpeg b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/successfullinstalation.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..efbf8c8571ee9a225493e6f7dac49e594cd7408a Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/successfullinstalation.jpeg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.md b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.md new file mode 100644 index 0000000000000000000000000000000000000000..2b4f237033d2eb02b9e8a405fc8d8a7f71831c14 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.md @@ -0,0 +1,110 @@ +VPN Access +========== + + + +Accessing IT4Innovations internal resources via VPN +--------------------------------------------------- + +**Failed to initialize connection subsystem Win 8.1 - 02-10-15 MS +patch** +Workaround can be found at +[https://docs.it4i.cz/vpn-connection-fail-in-win-8.1](../../vpn-connection-fail-in-win-8.1.html) + + + +For using resources and licenses which are located at IT4Innovations +local network, it is necessary to VPN connect to this network. +We use Cisco AnyConnect Secure Mobility Client, which is supported on +the following operating systems: + +- >Windows XP +- >Windows Vista +- >Windows 7 +- >Windows 8 +- >Linux +- >MacOS + +It is impossible to connect to VPN from other operating systems. + +VPN client installation +------------------------------------ + +You can install VPN client from web interface after successful login +with LDAP credentials on address <https://vpn1.it4i.cz/anselm> + + + +According to the Java settings after login, the client either +automatically installs, or downloads installation file for your +operating system. It is necessary to allow start of installation tool +for automatic installation. + + + +access](../executionaccess.jpg/@@images/4d6e7cb7-9aa7-419c-9583-6dfd92b2c015.jpeg "Execution access") +access + + +After successful installation, VPN connection will be established and +you can use available resources from IT4I network. + + + +If your Java setting doesn't allow automatic installation, you can +download installation file and install VPN client manually. + + + +After you click on the link, download of installation file will start. + + + +After successful download of installation file, you have to execute this +tool with administrator's rights and install VPN client manually. + +Working with VPN client +----------------------- + +You can use graphical user interface or command line interface to run +VPN client on all supported operating systems. We suggest using GUI. + +Before the first login to VPN, you have to fill +URL **https://vpn1.it4i.cz/anselm** into the text field. + + + +After you click on the Connect button, you must fill your login +credentials. + + + +After a successful login, the client will minimize to the system tray. +If everything works, you can see a lock in the Cisco tray icon. + + + +If you right-click on this icon, you will see a context menu in which +you can control the VPN connection. + + + +When you connect to the VPN for the first time, the client downloads the +profile and creates a new item "ANSELM" in the connection list. For +subsequent connections, it is not necessary to re-enter the URL address, +but just select the corresponding item. + + + +Then AnyConnect automatically proceeds like in the case of first logon. + + + +After a successful logon, you can see a green circle with a tick mark on +the lock icon. + + + +For disconnecting, right-click on the AnyConnect client icon in the +system tray and select **VPN Disconnect**. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/bullxB510.png b/converted/docs.it4i.cz/anselm-cluster-documentation/bullxB510.png new file mode 100644 index 0000000000000000000000000000000000000000..abbb472fc66b730c609ac5dda573b5edc92c4884 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/bullxB510.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.md b/converted/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.md new file mode 100644 index 0000000000000000000000000000000000000000..18ff5199a3a32582e6c25ce3acac1eae7d303e31 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.md @@ -0,0 +1,306 @@ +Compute Nodes +============= + + + +Nodes Configuration +------------------- + +Anselm is cluster of x86-64 Intel based nodes built on Bull Extreme +Computing bullx technology. The cluster contains four types of compute +nodes. + +###Compute Nodes Without Accelerator + +- + + 180 nodes + + + +- + + 2880 cores in total + + + +- + + two Intel Sandy Bridge E5-2665, 8-core, 2.4GHz processors per node + + + +- + + 64 GB of physical memory per node + + + +- one 500GB SATA 2,5” 7,2 krpm HDD per node +- + + bullx B510 blade servers + + + +- + + cn[1-180] + + + +###Compute Nodes With GPU Accelerator + +- + + 23 nodes + + + +- + + 368 cores in total + + + +- + + two Intel Sandy Bridge E5-2470, 8-core, 2.3GHz processors per node + + + +- + + 96 GB of physical memory per node + + + +- one 500GB SATA 2,5” 7,2 krpm HDD per node +- + + GPU accelerator 1x NVIDIA Tesla Kepler K20 per node + + + +- + + bullx B515 blade servers + + + +- + + cn[181-203] + + + +###Compute Nodes With MIC Accelerator + +- + + 4 nodes + + + +- + + 64 cores in total + + + +- + + two Intel Sandy Bridge E5-2470, 8-core, 2.3GHz processors per node + + + +- + + 96 GB of physical memory per node + + + +- one 500GB SATA 2,5” 7,2 krpm HDD per node +- + + MIC accelerator 1x Intel Phi 5110P per node + + + +- + + bullx B515 blade servers + + + +- + + cn[204-207] + + + +###Fat Compute Nodes + +- + + 2 nodes + + + +- + + 32 cores in total + + + +- + + 2 Intel Sandy Bridge E5-2665, 8-core, 2.4GHz processors per node + + + +- + + 512 GB of physical memory per node + + + +- two 300GB SAS 3,5”15krpm HDD (RAID1) per node +- + + two 100GB SLC SSD per node + + + +- + + bullx R423-E3 servers + + + +- + + cn[208-209] + + + + + + + +**Figure Anselm bullx B510 servers** + +### Compute Nodes Summary + + |Node type|Count|Range|Memory|Cores|[Access](resource-allocation-and-job-execution/resources-allocation-policy.html)| + |---|---|---|---|---|---| + |Nodes without accelerator|180|cn[1-180]|64GB|16 @ 2.4Ghz|qexp, qprod, qlong, qfree| + |Nodes with GPU accelerator|23|cn[181-203]|96GB|16 @ 2.3Ghz|qgpu, qprod| + |Nodes with MIC accelerator|4|cn[204-207]|96GB|16 @ 2.3GHz|qmic, qprod| + |Fat compute nodes|2|cn[208-209]|512GB|16 @ 2.4GHz|qfat, qprod| + +Processor Architecture +---------------------- + +Anselm is equipped with Intel Sandy Bridge processors Intel Xeon E5-2665 +(nodes without accelerator and fat nodes) and Intel Xeon E5-2470 (nodes +with accelerator). Processors support Advanced Vector Extensions (AVX) +256-bit instruction set. + +### Intel Sandy Bridge E5-2665 Processor + +- eight-core +- speed: 2.4 GHz, up to 3.1 GHz using Turbo Boost Technology +- peak performance: 19.2 Gflop/s per + core +- caches: + + + - L2: 256 KB per core + - L3: 20 MB per processor + + + +- memory bandwidth at the level of the processor: 51.2 GB/s + +### Intel Sandy Bridge E5-2470 Processor + +- eight-core +- speed: 2.3 GHz, up to 3.1 GHz using Turbo Boost Technology +- peak performance: 18.4 Gflop/s per + core +- caches: + + + - L2: 256 KB per core + - L3: 20 MB per processor + + + +- memory bandwidth at the level of the processor: 38.4 GB/s + + + +Nodes equipped with Intel Xeon E5-2665 CPU have set PBS resource +attribute cpu_freq = 24, nodes equipped with Intel Xeon E5-2470 CPU +have set PBS resource attribute cpu_freq = 23. + +` +$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16:cpu_freq=24 -I +` + +In this example, we allocate 4 nodes, 16 cores at 2.4GHhz per node. + +Intel Turbo Boost Technology is used by default, you can disable it for +all nodes of job by using resource attribute cpu_turbo_boost. + + $ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I + +Memory Architecture +------------------- + +### Compute Node Without Accelerator + +- 2 sockets +- Memory Controllers are integrated into processors. + + + - 8 DDR3 DIMMS per node + - 4 DDR3 DIMMS per CPU + - 1 DDR3 DIMMS per channel + - Data rate support: up to 1600MT/s + + + +- Populated memory: 8x 8GB DDR3 DIMM 1600Mhz + +### Compute Node With GPU or MIC Accelerator + +- 2 sockets +- Memory Controllers are integrated into processors. + + + - 6 DDR3 DIMMS per node + - 3 DDR3 DIMMS per CPU + - 1 DDR3 DIMMS per channel + - Data rate support: up to 1600MT/s + + + +- Populated memory: 6x 16GB DDR3 DIMM 1600Mhz + +### Fat Compute Node + +- 2 sockets +- Memory Controllers are integrated into processors. + + + - 16 DDR3 DIMMS per node + - 8 DDR3 DIMMS per CPU + - 2 DDR3 DIMMS per channel + - Data rate support: up to 1600MT/s + + + +- Populated memory: 16x 32GB DDR3 DIMM 1600Mhz + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.md b/converted/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.md new file mode 100644 index 0000000000000000000000000000000000000000..80ac34c467c727502fa3aff1eadafcf0098ee80b --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.md @@ -0,0 +1,114 @@ +Environment and Modules +======================= + + + +### Environment Customization + +After logging in, you may want to configure the environment. Write your +preferred path definitions, aliases, functions and module loads in the +.bashrc file + +` +# ./bashrc + +# Source global definitions +if [ -f /etc/bashrc ]; then + . /etc/bashrc +fi + +# User specific aliases and functions +alias qs='qstat -a' +module load PrgEnv-gnu + +# Display informations to standard output - only in interactive ssh session +if [ -n "$SSH_TTY" ] +then + module list # Display loaded modules +fi +` + +Do not run commands outputing to standard output (echo, module list, +etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental +functionality (scp, PBS) of your account! Take care for SSH session +interactivity for such commands as + stated in the previous example. +in the previous example. + +### Application Modules + +In order to configure your shell for running particular application on +Anselm we use Module package interface. + +The modules set up the application paths, library paths and environment +variables for running particular application. + +We have also second modules repository. This modules repository is +created using tool called EasyBuild. On Salomon cluster, all modules +will be build by this tool. If you want to use software from this +modules repository, please follow instructions in section [Application +Modules +Path Expansion](environment-and-modules.html#EasyBuild). + +The modules may be loaded, unloaded and switched, according to momentary +needs. + +To check available modules use + +` +$ module avail +` + +To load a module, for example the octave module use + +` +$ module load octave +` + +loading the octave module will set up paths and environment variables of +your active shell such that you are ready to run the octave software + +To check loaded modules use + +` +$ module list +` + + To unload a module, for example the octave module use + +` +$ module unload octave +` + +Learn more on modules by reading the module man page + +` +$ man module +` + +Following modules set up the development environment + +PrgEnv-gnu sets up the GNU development environment in conjunction with +the bullx MPI library + +PrgEnv-intel sets up the INTEL development environment in conjunction +with the Intel MPI library + +### Application Modules Path Expansion + +All application modules on Salomon cluster (and further) will be build +using tool called +[EasyBuild](http://hpcugent.github.io/easybuild/ "EasyBuild"). +In case that you want to use some applications that are build by +EasyBuild already, you have to modify your MODULEPATH environment +variable. + +` +export MODULEPATH=$MODULEPATH:/apps/easybuild/modules/all/ +` + +This command expands your searched paths to modules. You can also add +this command to the .bashrc file to expand paths permanently. After this +command, you can use same commands to list/add/remove modules as is +described above. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.md b/converted/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.md new file mode 100644 index 0000000000000000000000000000000000000000..e70624ac373dd777964cb9144416af656f6bd55a --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.md @@ -0,0 +1,365 @@ +Hardware Overview +================= + + + +The Anselm cluster consists of 209 computational nodes named cn[1-209] +of which 180 are regular compute nodes, 23 GPU Kepler K20 accelerated +nodes, 4 MIC Xeon Phi 5110 accelerated nodes and 2 fat nodes. Each node +is a powerful x86-64 computer, +equipped with 16 cores (two eight-core Intel Sandy Bridge processors), +at least 64GB RAM, and local hard drive. The user access to the Anselm +cluster is provided by two login nodes login[1,2]. The nodes are +interlinked by high speed InfiniBand and Ethernet networks. All nodes +share 320TB /home disk storage to store the user files. The 146TB shared +/scratch storage is available for the scratch data. + +The Fat nodes are equipped with large amount (512GB) of memory. +Virtualization infrastructure provides resources to run long term +servers and services in virtual mode. Fat nodes and virtual servers may +access 45 TB of dedicated block storage. Accelerated nodes, fat nodes, +and virtualization infrastructure are available [upon +request](https://support.it4i.cz/rt) made by a PI. + +Schematic representation of the Anselm cluster. Each box represents a +node (computer) or storage capacity: + +User-oriented infrastructure +Storage +Management infrastructure + -------- + login1 + login2 + dm1 + -------- + +Rack 01, Switch isw5 + + -------- |---|---|---- -------------- -------------- -------------- + cn186 cn187 cn188 cn189 + cn181 cn182 cn183 cn184 cn185 + -------- |---|---|---- -------------- -------------- -------------- + +Rack 01, Switch isw4 + +cn29 +cn30 +cn31 +cn32 +cn33 +cn34 +cn35 +cn36 +cn19 +cn20 +cn21 +cn22 +cn23 +cn24 +cn25 +cn26 +cn27 +cn28 +<col width="100%" /> + | <p> <p>Lustre FS<p>/home320TB<p> <p> \ | + |Lustre FS<p>/scratch146TB\ | + +Management +nodes +Block storage +45 TB +Virtualization +infrastructure +servers +... +Srv node +Srv node +Srv node +... +Rack 01, Switch isw0 + +cn11 +cn12 +cn13 +cn14 +cn15 +cn16 +cn17 +cn18 +cn1 +cn2 +cn3 +cn4 +cn5 +cn6 +cn7 +cn8 +cn9 +cn10 +Rack 02, Switch isw10 + +cn73 +cn74 +cn75 +cn76 +cn77 +cn78 +cn79 +cn80 +cn190 +cn191 +cn192 +cn205 +cn206 +Rack 02, Switch isw9 + +cn65 +cn66 +cn67 +cn68 +cn69 +cn70 +cn71 +cn72 +cn55 +cn56 +cn57 +cn58 +cn59 +cn60 +cn61 +cn62 +cn63 +cn64 +Rack 02, Switch isw6 + +cn47 +cn48 +cn49 +cn50 +cn51 +cn52 +cn53 +cn54 +cn37 +cn38 +cn39 +cn40 +cn41 +cn42 +cn43 +cn44 +cn45 +cn46 +Rack 03, Switch isw15 + +cn193 +cn194 +cn195 +cn207 +cn117 +cn118 +cn119 +cn120 +cn121 +cn122 +cn123 +cn124 +cn125 +cn126 +Rack 03, Switch isw14 + +cn109 +cn110 +cn111 +cn112 +cn113 +cn114 +cn115 +cn116 +cn99 +cn100 +cn101 +cn102 +cn103 +cn104 +cn105 +cn106 +cn107 +cn108 +Rack 03, Switch isw11 + +cn91 +cn92 +cn93 +cn94 +cn95 +cn96 +cn97 +cn98 +cn81 +cn82 +cn83 +cn84 +cn85 +cn86 +cn87 +cn88 +cn89 +cn90 +Rack 04, Switch isw20 + +cn173 +cn174 +cn175 +cn176 +cn177 +cn178 +cn179 +cn180 +cn163 +cn164 +cn165 +cn166 +cn167 +cn168 +cn169 +cn170 +cn171 +cn172 +Rack 04, **Switch** isw19 + +cn155 +cn156 +cn157 +cn158 +cn159 +cn160 +cn161 +cn162 +cn145 +cn146 +cn147 +cn148 +cn149 +cn150 +cn151 +cn152 +cn153 +cn154 +Rack 04, Switch isw16 + +cn137 +cn138 +cn139 +cn140 +cn141 +cn142 +cn143 +cn144 +cn127 +cn128 +cn129 +cn130 +cn131 +cn132 +cn133 +cn134 +cn135 +cn136 +Rack 05, Switch isw21 + + -------- |---|---|---- -------------- -------------- -------------- + cn201 cn202 cn203 cn204 + cn196 cn197 cn198 cn199 cn200 + -------- |---|---|---- -------------- -------------- -------------- + + ---------------- + Fat node cn208 + Fat node cn209 + ... + ---------------- + +The cluster compute nodes cn[1-207] are organized within 13 chassis. + +There are four types of compute nodes: + +- 180 compute nodes without the accelerator +- 23 compute nodes with GPU accelerator - equipped with NVIDIA Tesla + Kepler K20 +- 4 compute nodes with MIC accelerator - equipped with Intel Xeon Phi + 5110P +- 2 fat nodes - equipped with 512GB RAM and two 100GB SSD drives + +[More about Compute nodes](compute-nodes.html). + +GPU and accelerated nodes are available upon request, see the [Resources +Allocation +Policy](resource-allocation-and-job-execution/resources-allocation-policy.html). + +All these nodes are interconnected by fast +InfiniBand class="WYSIWYG_LINK">QDR +network and Ethernet network. [More about the +Network](network.html). +Every chassis provides Infiniband switch, marked **isw**, connecting all +nodes in the chassis, as well as connecting the chassis to the upper +level switches. + +All nodes share 360TB /home disk storage to store user files. The 146TB +shared /scratch storage is available for the scratch data. These file +systems are provided by Lustre parallel file system. There is also local +disk storage available on all compute nodes /lscratch. [More about + +Storage](storage.html). + +The user access to the Anselm cluster is provided by two login nodes +login1, login2, and data mover node dm1. [More about accessing +cluster.](accessing-the-cluster.html) + + The parameters are summarized in the following tables: + +In general** +Primary purpose +High Performance Computing +Architecture of compute nodes +x86-64 +Operating system +Linux +[**Compute nodes**](compute-nodes.html) +Totally +209 +Processor cores +16 (2x8 cores) +RAM +min. 64 GB, min. 4 GB per core +Local disk drive +yes - usually 500 GB +Compute network +InfiniBand QDR, fully non-blocking, fat-tree +w/o accelerator +180, cn[1-180] +GPU accelerated +23, cn[181-203] +MIC accelerated +4, cn[204-207] +Fat compute nodes +2, cn[208-209] +In total** +Total theoretical peak performance (Rpeak) +94 Tflop/s +Total max. LINPACK performance (Rmax) +73 Tflop/s +Total amount of RAM +15.136 TB + |Node|Processor|Memory|Accelerator| + |---|---|---|---| + |w/o accelerator|2x Intel Sandy Bridge E5-2665, 2.4GHz|64GB|-| + |GPU accelerated|2x Intel Sandy Bridge E5-2470, 2.3GHz|96GB|NVIDIA Kepler K20| + |MIC accelerated|2x Intel Sandy Bridge E5-2470, 2.3GHz|96GB|Intel Xeon Phi P5110| + |Fat compute node|2x Intel Sandy Bridge E5-2665, 2.4GHz|512GB|-| + + For more details please refer to the [Compute +nodes](compute-nodes.html), +[Storage](storage.html), and +[Network](network.html). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/introduction.md b/converted/docs.it4i.cz/anselm-cluster-documentation/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..68a2e10aa3b2079f2fb1dce3b25653fb7f494ccc --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/introduction.md @@ -0,0 +1,39 @@ +Introduction +============ + + + +Welcome to Anselm supercomputer cluster. The Anselm cluster consists of +209 compute nodes, totaling 3344 compute cores with 15TB RAM and giving +over 94 Tflop/s theoretical peak performance. Each node is a +powerful x86-64 computer, equipped with 16 +cores, at least 64GB RAM, and 500GB harddrive. Nodes are interconnected +by fully non-blocking fat-tree Infiniband network and equipped with +Intel Sandy Bridge processors. A few nodes are also equipped with NVIDIA +Kepler GPU or Intel Xeon Phi MIC accelerators. Read more in [Hardware +Overview](hardware-overview.html). + +The cluster runs bullx Linux [ +](http://www.bull.com/bullx-logiciels/systeme-exploitation.html)[operating +system](software/operating-system.html), which is +compatible with the RedHat [ +Linux +family.](http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg) +We have installed a wide range of +[software](software.1.html) packages targeted at +different scientific domains. These packages are accessible via the +[modules environment](environment-and-modules.html). + +User data shared file-system (HOME, 320TB) and job data shared +file-system (SCRATCH, 146TB) are available to users. + +The PBS Professional workload manager provides [computing resources +allocations and job +execution](resource-allocation-and-job-execution.html). + +Read more on how to [apply for +resources](../get-started-with-it4innovations/applying-for-resources.html), +[obtain login +credentials,](../get-started-with-it4innovations/obtaining-login-credentials.html) +and [access the cluster](accessing-the-cluster.html). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/legend.png b/converted/docs.it4i.cz/anselm-cluster-documentation/legend.png new file mode 100644 index 0000000000000000000000000000000000000000..2950ff1bd7059f93195437476fa9333ec42da408 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/legend.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/network.md b/converted/docs.it4i.cz/anselm-cluster-documentation/network.md new file mode 100644 index 0000000000000000000000000000000000000000..b897039fe15a2ddc1374459f610af810a773f42a --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/network.md @@ -0,0 +1,58 @@ +Network +======= + + + +All compute and login nodes of Anselm are interconnected by +[Infiniband](http://en.wikipedia.org/wiki/InfiniBand) +QDR network and by Gigabit +[Ethernet](http://en.wikipedia.org/wiki/Ethernet) +network. Both networks may be used to transfer user data. + +Infiniband Network +------------------ + +All compute and login nodes of Anselm are interconnected by a +high-bandwidth, low-latency +[Infiniband](http://en.wikipedia.org/wiki/InfiniBand) +QDR network (IB 4x QDR, 40 Gbps). The network topology is a fully +non-blocking fat-tree. + +The compute nodes may be accessed via the Infiniband network using ib0 +network interface, in address range 10.2.1.1-209. The MPI may be used to +establish native Infiniband connection among the nodes. + +The network provides **2170MB/s** transfer rates via the TCP connection +(single stream) and up to **3600MB/s** via native Infiniband protocol. + +The Fat tree topology ensures that peak transfer rates are achieved +between any two nodes, independent of network traffic exchanged among +other nodes concurrently. + +Ethernet Network +---------------- + +The compute nodes may be accessed via the regular Gigabit Ethernet +network interface eth0, in address range 10.1.1.1-209, or by using +aliases cn1-cn209. +The network provides **114MB/s** transfer rates via the TCP connection. + +Example +------- + +` +$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob +$ qstat -n -u username + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +15209.srv11 username qexp Name0 5530 4 64 -- 01:00 R 00:00 + cn17/0*16+cn108/0*16+cn109/0*16+cn110/0*16 + +$ ssh 10.2.1.110 +$ ssh 10.1.1.108 +` + +In this example, we access the node cn110 by Infiniband network via the +ib0 interface, then from cn110 to cn108 by Ethernet network. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/prace.md b/converted/docs.it4i.cz/anselm-cluster-documentation/prace.md new file mode 100644 index 0000000000000000000000000000000000000000..9d29c080f0a885c9022254e5024020f52f0304ba --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/prace.md @@ -0,0 +1,374 @@ +PRACE User Support +================== + + + +Intro +----- + +PRACE users coming to Anselm as to TIER-1 system offered through the +DECI calls are in general treated as standard users and so most of the +general documentation applies to them as well. This section shows the +main differences for quicker orientation, but often uses references to +the original documentation. PRACE users who don't undergo the full +procedure (including signing the IT4I AuP on top of the PRACE AuP) will +not have a password and thus access to some services intended for +regular users. This can lower their comfort, but otherwise they should +be able to use the TIER-1 system as intended. Please see the [Obtaining +Login Credentials +section](../get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.html), +if the same level of access is required. + +All general [PRACE User +Documentation](http://www.prace-ri.eu/user-documentation/) +should be read before continuing reading the local documentation here. + +Help and Support +-------------------- + +If you have any troubles, need information, request support or want to +install additional software, please use [PRACE +Helpdesk](http://www.prace-ri.eu/helpdesk-guide264/). + +Information about the local services are provided in the [introduction +of general user documentation](introduction.html). +Please keep in mind, that standard PRACE accounts don't have a password +to access the web interface of the local (IT4Innovations) request +tracker and thus a new ticket should be created by sending an e-mail to +support[at]it4i.cz. + +Obtaining Login Credentials +--------------------------- + +In general PRACE users already have a PRACE account setup through their +HOMESITE (institution from their country) as a result of rewarded PRACE +project proposal. This includes signed PRACE AuP, generated and +registered certificates, etc. + +If there's a special need a PRACE user can get a standard (local) +account at IT4Innovations. To get an account on the Anselm cluster, the +user needs to obtain the login credentials. The procedure is the same as +for general users of the cluster, so please see the corresponding +[section of the general documentation +here](../get-started-with-it4innovations/obtaining-login-credentials.html). + +Accessing the cluster +--------------------- + +### Access with GSI-SSH + +For all PRACE users the method for interactive access (login) and data +transfer based on grid services from Globus Toolkit (GSI SSH and +GridFTP) is supported. + +The user will need a valid certificate and to be present in the PRACE +LDAP (please contact your HOME SITE or the primary investigator of your +project for LDAP account creation). + +Most of the information needed by PRACE users accessing the Anselm +TIER-1 system can be found here: + +- [General user's + FAQ](http://www.prace-ri.eu/Users-General-FAQs) +- [Certificates + FAQ](http://www.prace-ri.eu/Certificates-FAQ) +- [Interactive access using + GSISSH](http://www.prace-ri.eu/Interactive-Access-Using-gsissh) +- [Data transfer with + GridFTP](http://www.prace-ri.eu/Data-Transfer-with-GridFTP-Details) +- [Data transfer with + gtransfer](http://www.prace-ri.eu/Data-Transfer-with-gtransfer) + + + +Before you start to use any of the services don't forget to create a +proxy certificate from your certificate: + + $ grid-proxy-init + +To check whether your proxy certificate is still valid (by default it's +valid 12 hours), use: + + $ grid-proxy-info + + + +To access Anselm cluster, two login nodes running GSI SSH service are +available. The service is available from public Internet as well as from +the internal PRACE network (accessible only from other PRACE partners). + +***Access from PRACE network:** + +It is recommended to use the single DNS name +anselm-prace.it4i.cz which is distributed +between the two login nodes. If needed, user can login directly to one +of the login nodes. The addresses are: + + Login address |Port|Protocol| Login node + ----------------------------- |---|---| ------------------ + anselm-prace.it4i.cz 2222 gsissh login1 or login2 + login1-prace.anselm.it4i.cz 2222 gsissh login1 + login2-prace.anselm.it4i.cz 2222 gsissh login2 + + + + $ gsissh -p 2222 anselm-prace.it4i.cz + +When logging from other PRACE system, the prace_service script can be +used: + + $ gsissh `prace_service -i -s anselm` + + + +***Access from public Internet:** + +It is recommended to use the single DNS name +anselm.it4i.cz which is distributed between the +two login nodes. If needed, user can login directly to one of the login +nodes. The addresses are: + + Login address |Port|Protocol| Login node + ----------------------- |---|---| ------------------ + anselm.it4i.cz 2222 gsissh login1 or login2 + login1.anselm.it4i.cz 2222 gsissh login1 + login2.anselm.it4i.cz 2222 gsissh login2 + + $ gsissh -p 2222 anselm.it4i.cz + +When logging from other PRACE system, the +prace_service script can be used: + + $ gsissh `prace_service -e -s anselm` + + + +Although the preferred and recommended file transfer mechanism is [using +GridFTP](prace.html#file-transfers), the GSI SSH +implementation on Anselm supports also SCP, so for small files transfer +gsiscp can be used: + + $ gsiscp -P 2222 _LOCAL_PATH_TO_YOUR_FILE_ anselm.it4i.cz:_ANSELM_PATH_TO_YOUR_FILE_ + + $ gsiscp -P 2222 anselm.it4i.cz:_ANSELM_PATH_TO_YOUR_FILE_ _LOCAL_PATH_TO_YOUR_FILE_ + + $ gsiscp -P 2222 _LOCAL_PATH_TO_YOUR_FILE_ anselm-prace.it4i.cz:_ANSELM_PATH_TO_YOUR_FILE_ + + $ gsiscp -P 2222 anselm-prace.it4i.cz:_ANSELM_PATH_TO_YOUR_FILE_ _LOCAL_PATH_TO_YOUR_FILE_ + +### Access to X11 applications (VNC) + +If the user needs to run X11 based graphical application and does not +have a X11 server, the applications can be run using VNC service. If the +user is using regular SSH based access, please see the [section in +general +documentation](https://docs.it4i.cz/anselm-cluster-documentation/resolveuid/11e53ad0d2fd4c5187537f4baeedff33). + +If the user uses GSI SSH based access, then the procedure is similar to +the SSH based access ([look +here](https://docs.it4i.cz/anselm-cluster-documentation/resolveuid/11e53ad0d2fd4c5187537f4baeedff33)), +only the port forwarding must be done using GSI SSH: + + $ gsissh -p 2222 anselm.it4i.cz -L 5961:localhost:5961 + +### Access with SSH + +After successful obtainment of login credentials for the local +IT4Innovations account, the PRACE users can access the cluster as +regular users using SSH. For more information please see the [section in +general +documentation](https://docs.it4i.cz/anselm-cluster-documentation/resolveuid/5d3d6f3d873a42e584cbf4365c4e251b). + +File transfers +------------------ + +PRACE users can use the same transfer mechanisms as regular users (if +they've undergone the full registration procedure). For information +about this, please see [the section in the general +documentation](https://docs.it4i.cz/anselm-cluster-documentation/resolveuid/5d3d6f3d873a42e584cbf4365c4e251b). + +Apart from the standard mechanisms, for PRACE users to transfer data +to/from Anselm cluster, a GridFTP server running Globus Toolkit GridFTP +service is available. The service is available from public Internet as +well as from the internal PRACE network (accessible only from other +PRACE partners). + +There's one control server and three backend servers for striping and/or +backup in case one of them would fail. + +***Access from PRACE network:** + + Login address Port Node role + ----- |---|---| + gridftp-prace.anselm.it4i.cz 2812 Front end /control server + login1-prace.anselm.it4i.cz 2813 Backend / data mover server + login2-prace.anselm.it4i.cz 2813 Backend / data mover server + dm1-prace.anselm.it4i.cz 2813 Backend / data mover server + +Copy files **to** Anselm by running the following commands on your local +machine: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://gridftp-prace.anselm.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://`prace_service -i -f anselm`/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ + +Copy files **from** Anselm: + + $ globus-url-copy gsiftp://gridftp-prace.anselm.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy gsiftp://`prace_service -i -f anselm`/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + + + +***Access from public Internet:** + + Login address Port Node role + ------------------------ |---|---|------------------- + gridftp.anselm.it4i.cz 2812 Front end /control server + login1.anselm.it4i.cz 2813 Backend / data mover server + login2.anselm.it4i.cz 2813 Backend / data mover server + dm1.anselm.it4i.cz 2813 Backend / data mover server + +Copy files **to** Anselm by running the following commands on your local +machine: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://gridftp.anselm.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://`prace_service -e -f anselm`/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ + +Copy files **from** Anselm: + + $ globus-url-copy gsiftp://gridftp.anselm.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy gsiftp://`prace_service -e -f anselm`/home/prace/_YOUR_ACCOUNT_ON_ANSELM_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + + + +Generally both shared file systems are available through GridFTP: + + |File system mount point|Filesystem|Comment| + |---|---| + |/home|Lustre|Default HOME directories of users in format /home/prace/login/| + |/scratch|Lustre|Shared SCRATCH mounted on the whole cluster| + +More information about the shared file systems is available +[here](storage.html). + +Usage of the cluster +-------------------- + +There are some limitations for PRACE user when using the cluster. By +default PRACE users aren't allowed to access special queues in the PBS +Pro to have high priority or exclusive access to some special equipment +like accelerated nodes and high memory (fat) nodes. There may be also +restrictions obtaining a working license for the commercial software +installed on the cluster, mostly because of the license agreement or +because of insufficient amount of licenses. + +For production runs always use scratch file systems, either the global +shared or the local ones. The available file systems are described +[here](hardware-overview.html). + +### Software, Modules and PRACE Common Production Environment + +All system wide installed software on the cluster is made available to +the users via the modules. The information about the environment and +modules usage is in this [section of general +documentation](environment-and-modules.html). + +PRACE users can use the "prace" module to use the [PRACE Common +Production +Environment](http://www.prace-ri.eu/PRACE-common-production). + + $ module load prace + + + +### Resource Allocation and Job Execution + +General information about the resource allocation, job queuing and job +execution is in this [section of general +documentation](resource-allocation-and-job-execution/introduction.html). + +For PRACE users, the default production run queue is "qprace". PRACE +users can also use two other queues "qexp" and "qfree". + + ------------------------------------------------------------------------------------------------------------------------- + queue Active project Project resources Nodes priority authorization walltime + + --------------------- -|---|---|---|- ---- |---|---|----- ------------- + **qexp** no none required 2 reserved, high no 1 / 1h + \ 8 total + + gt; 0 >1006 nodes, max 86 per job 0 no 24 / 48h> 0 178 w/o accelerator medium no 24 / 48h + \ + + + **qfree** yes none required 178 w/o accelerator very low no 12 / 12h + \ + ------------------------------------------------------------------------------------------------------------------------- + +qprace**, the PRACE \***: This queue is intended for +normal production runs. It is required that active project with nonzero +remaining resources is specified to enter the qprace. The queue runs +with medium priority and no special authorization is required to use it. +The maximum runtime in qprace is 12 hours. If the job needs longer time, +it must use checkpoint/restart functionality. + +### Accounting & Quota + +The resources that are currently subject to accounting are the core +hours. The core hours are accounted on the wall clock basis. The +accounting runs whenever the computational cores are allocated or +blocked via the PBS Pro workload manager (the qsub command), regardless +of whether the cores are actually used for any calculation. See [example +in the general +documentation](resource-allocation-and-job-execution/resources-allocation-policy.html). + +PRACE users should check their project accounting using the [PRACE +Accounting Tool +(DART)](http://www.prace-ri.eu/accounting-report-tool/). + +Users who have undergone the full local registration procedure +(including signing the IT4Innovations Acceptable Use Policy) and who +have received local password may check at any time, how many core-hours +have been consumed by themselves and their projects using the command +"it4ifree". Please note that you need to know your user password to use +the command and that the displayed core hours are "system core hours" +which differ from PRACE "standardized core hours". + +The **it4ifree** command is a part of it4i.portal.clients package, +located here: +<https://pypi.python.org/pypi/it4i.portal.clients> + + $ it4ifree + Password: +     PID  Total Used ...by me Free +   -------- ------- ------ -------- ------- +   OPEN-0-0 1500000 400644  225265 1099356 +   DD-13-1   10000 2606 2606 7394 + + + +By default file system quota is applied. To check the current status of +the quota use + + $ lfs quota -u USER_LOGIN /home + $ lfs quota -u USER_LOGIN /scratch + +If the quota is insufficient, please contact the +[support](prace.html#help-and-support) and request an +increase. + + + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/quality1.png b/converted/docs.it4i.cz/anselm-cluster-documentation/quality1.png new file mode 100644 index 0000000000000000000000000000000000000000..ed6304dd9c6d1acdd4f3f9e4514930d2a7853d8c Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/quality1.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/quality2.png b/converted/docs.it4i.cz/anselm-cluster-documentation/quality2.png new file mode 100644 index 0000000000000000000000000000000000000000..73d9b9d31f7989d9e2c409d91494f872f97b9397 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/quality2.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/quality3.png b/converted/docs.it4i.cz/anselm-cluster-documentation/quality3.png new file mode 100644 index 0000000000000000000000000000000000000000..be279b83580528fcaaf2171c7c10f485e06420c7 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/quality3.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.md b/converted/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.md new file mode 100644 index 0000000000000000000000000000000000000000..e0c1e9600beca6a880c140b4c4193607c3b04f74 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.md @@ -0,0 +1,266 @@ +Remote visualization service +============================ + +Introduction +------------ + +The goal of this service is to provide the users a GPU accelerated use +of OpenGL applications, especially for pre- and post- processing work, +where not only the GPU performance is needed but also fast access to the +shared file systems of the cluster and a reasonable amount of RAM. + +The service is based on integration of open source tools VirtualGL and +TurboVNC together with the cluster's job scheduler PBS Professional. + +Currently two compute nodes are dedicated for this service with +following configuration for each node: + +[**Visualization node +configuration**](compute-nodes.html) +CPU +2x Intel Sandy Bridge E5-2670, 2.6GHz +Processor cores +16 (2x8 cores) +RAM +64 GB, min. 4 GB per core +GPU +NVIDIA Quadro 4000, 2GB RAM +Local disk drive +yes - 500 GB +Compute network +InfiniBand QDR +Schematic overview +------------------ + + + + + +How to use the service +---------------------- + +### Setup and start your own TurboVNC server. + +TurboVNC is designed and implemented for cooperation with VirtualGL and +available for free for all major platforms. For more information and +download, please refer to: <http://sourceforge.net/projects/turbovnc/> + +Always use TurboVNC on both sides** (server and client) **don't mix +TurboVNC and other VNC implementations** (TightVNC, TigerVNC, ...) as +the VNC protocol implementation may slightly differ and diminish your +user experience by introducing picture artifacts, etc. + +The procedure is: + +#### 1. Connect to a login node. {#1-connect-to-a-login-node} + +Please [follow the +documentation](https://docs.it4i.cz/anselm-cluster-documentation/resolveuid/5d3d6f3d873a42e584cbf4365c4e251b). + +#### 2. Run your own instance of TurboVNC server. {#2-run-your-own-instance-of-turbovnc-server} + +To have the OpenGL acceleration, **24 bit color depth must be used**. +Otherwise only the geometry (desktop size) definition is needed. + +*At first VNC server run you need to define a password.* + +This example defines desktop with dimensions 1200x700 pixels and 24 bit +color depth. + +` +$ module load turbovnc/1.2.2 +$ vncserver -geometry 1200x700 -depth 24 + +Desktop 'TurboVNC: login2:1 (username)' started on display login2:1 + +Starting applications specified in /home/username/.vnc/xstartup.turbovnc +Log file is /home/username/.vnc/login2:1.log +` + +#### 3. Remember which display number your VNC server runs (you will need it in the future to stop the server). {#3-remember-which-display-number-your-vnc-server-runs-you-will-need-it-in-the-future-to-stop-the-server} + +` +$ vncserver -list + +TurboVNC server sessions: + +X DISPLAY # PROCESS ID +:1 23269 +` + +In this example the VNC server runs on display **:1**. + +#### 4. Remember the exact login node, where your VNC server runs. {#4-remember-the-exact-login-node-where-your-vnc-server-runs} + +` +$ uname -n +login2 +` + +In this example the VNC server runs on **login2**. + +#### 5. Remember on which TCP port your own VNC server is running. {#5-remember-on-which-tcp-port-your-own-vnc-server-is-running} + +To get the port you have to look to the log file of your VNC server. + +` +$ grep -E "VNC.*port" /home/username/.vnc/login2:1.log +20/02/2015 14:46:41 Listening for VNC connections on TCP port 5901 +` + +In this example the VNC server listens on TCP port **5901**. + +#### 6. Connect to the login node where your VNC server runs with SSH to tunnel your VNC session. {#6-connect-to-the-login-node-where-your-vnc-server-runs-with-ssh-to-tunnel-your-vnc-session} + +Tunnel the TCP port on which your VNC server is listenning. + +` +$ ssh login2.anselm.it4i.cz -L 5901:localhost:5901 +` + +*If you use Windows and Putty, please refer to port forwarding setup + in the documentation:* +[https://docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/x-window-and-vnc#section-12](accessing-the-cluster/x-window-and-vnc.html#section-12) + +#### 7. If you don't have Turbo VNC installed on your workstation. {#7-if-you-don-t-have-turbo-vnc-installed-on-your-workstation} + +Get it from: <http://sourceforge.net/projects/turbovnc/> + +#### 8. Run TurboVNC Viewer from your workstation. {#8-run-turbovnc-viewer-from-your-workstation} + +Mind that you should connect through the SSH tunneled port. In this +example it is 5901 on your workstation (localhost). + +` +$ vncviewer localhost:5901 +` + +*If you use Windows version of TurboVNC Viewer, just run the Viewer and +use address **localhost:5901**.* + +#### 9. Proceed to the chapter "Access the visualization node." {#9-proceed-to-the-chapter-access-the-visualization-node} + +*Now you should have working TurboVNC session connected to your +workstation.* + +#### 10. After you end your visualization session. {#10-after-you-end-your-visualization-session} + +*Don't forget to correctly shutdown your own VNC server on the login +node!* + +` +$ vncserver -kill :1 +` + +Access the visualization node +----------------------------- + +To access the node use a dedicated PBS Professional scheduler queue +qviz**. The queue has following properties: + + |queue |active project |project resources |nodes<th align="left">min ncpus*<th align="left">priority<th align="left">authorization<th align="left">walltime | + | --- | --- | + |<strong>qviz              </strong> Visualization queue\ |yes |none required |2 |4 |><em>150</em> |no |1 hour / 2 hours | + +Currently when accessing the node, each user gets 4 cores of a CPU +allocated, thus approximately 16 GB of RAM and 1/4 of the GPU capacity. +*If more GPU power or RAM is required, it is recommended to allocate one +whole node per user, so that all 16 cores, whole RAM and whole GPU is +exclusive. This is currently also the maximum allowed allocation per one +user. One hour of work is allocated by default, the user may ask for 2 +hours maximum.* + +To access the visualization node, follow these steps: + +#### 1. In your VNC session, open a terminal and allocate a node using PBSPro qsub command. {#1-in-your-vnc-session-open-a-terminal-and-allocate-a-node-using-pbspro-qsub-command} + +*This step is necessary to allow you to proceed with next steps.* + +` +$ qsub -I -q qviz -A PROJECT_ID +` + +In this example the default values for CPU cores and usage time are +used. + +` +$ qsub -I -q qviz -A PROJECT_ID -l select=1:ncpus=16 -l walltime=02:00:00 +` + +*Substitute **PROJECT_ID** with the assigned project identification +string.* + +In this example a whole node for 2 hours is requested. + +If there are free resources for your request, you will have a shell +running on an assigned node. Please remember the name of the node. + +` +$ uname -n +srv8 +` + +In this example the visualization session was assigned to node **srv8**. + +#### 2. In your VNC session open another terminal (keep the one with interactive PBSPro job open). {#2-in-your-vnc-session-open-another-terminal-keep-the-one-with-interactive-pbspro-job-open} + +Setup the VirtualGL connection to the node, which PBSPro allocated for +your job. + +` +$ vglconnect srv8 +` + +You will be connected with created VirtualGL tunnel to the visualization +node, where you will have a shell. + +#### 3. Load the VirtualGL module. {#3-load-the-virtualgl-module} + +` +$ module load virtualgl/2.4 +` + +#### 4. Run your desired OpenGL accelerated application using VirtualGL script "vglrun". {#4-run-your-desired-opengl-accelerated-application-using-virtualgl-script-vglrun} + +` +$ vglrun glxgears +` + +Please note, that if you want to run an OpenGL application which is +available through modules, you need at first load the respective module. +E. g. to run the **Mentat** OpenGL application from **MARC** software +package use: + +` +$ module load marc/2013.1 +$ vglrun mentat +` + +#### 5. After you end your work with the OpenGL application. {#5-after-you-end-your-work-with-the-opengl-application} + +Just logout from the visualization node and exit both opened terminals +and end your VNC server session as described above. + +Tips and Tricks +--------------- + +If you want to increase the responsibility of the visualization, please +adjust your TurboVNC client settings in this way: + + + +To have an idea how the settings are affecting the resulting picture +quality three levels of "JPEG image quality" are demonstrated: + +1. JPEG image quality = 30 + + + +2. JPEG image quality = 15 + + + +3. JPEG image quality = 10 + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.md b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.md new file mode 100644 index 0000000000000000000000000000000000000000..e05f7ce6eb1e7c1bf9a5d62e77271668435ea368 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.md @@ -0,0 +1,433 @@ +Capacity computing +================== + + + +Introduction +------------ + +In many cases, it is useful to submit huge (>100+) number of +computational jobs into the PBS queue system. Huge number of (small) +jobs is one of the most effective ways to execute embarrassingly +parallel calculations, achieving best runtime, throughput and computer +utilization. + +However, executing huge number of jobs via the PBS queue may strain the +system. This strain may result in slow response to commands, inefficient +scheduling and overall degradation of performance and user experience, +for all users. For this reason, the number of jobs is **limited to 100 +per user, 1000 per job array** + +Please follow one of the procedures below, in case you wish to schedule +more than >100 jobs at a time. + +- Use [Job arrays](capacity-computing.html#job-arrays) + when running huge number of + [multithread](capacity-computing.html#shared-jobscript-on-one-node) + (bound to one node only) or multinode (multithread across + several nodes) jobs +- Use [GNU + parallel](capacity-computing.html#gnu-parallel) when + running single core jobs +- Combine[GNU parallel with Job + arrays](capacity-computing.html#combining-job-arrays-and-gnu-parallel) + when running huge number of single core jobs + +Policy +------ + +1. A user is allowed to submit at most 100 jobs. Each job may be [a job + array](capacity-computing.html#job-arrays). +2. The array size is at most 1000 subjobs. + +Job arrays +-------------- + +Huge number of jobs may be easily submitted and managed as a job array. + +A job array is a compact representation of many jobs, called subjobs. +The subjobs share the same job script, and have the same values for all +attributes and resources, with the following exceptions: + +- each subjob has a unique index, $PBS_ARRAY_INDEX +- job Identifiers of subjobs only differ by their indices +- the state of subjobs can differ (R,Q,...etc.) + +All subjobs within a job array have the same scheduling priority and +schedule as independent jobs. +Entire job array is submitted through a single qsub command and may be +managed by qdel, qalter, qhold, qrls and qsig commands as a single job. + +### Shared jobscript + +All subjobs in job array use the very same, single jobscript. Each +subjob runs its own instance of the jobscript. The instances execute +different work controlled by $PBS_ARRAY_INDEX variable. + +Example: + +Assume we have 900 input files with name beginning with "file" (e. g. +file001, ..., file900). Assume we would like to use each of these input +files with program executable myprog.x, each as a separate job. + +First, we create a tasklist file (or subjobs list), listing all tasks +(subjobs) - all input files in our example: + +` +$ find . -name 'file*' > tasklist +` + +Then we create jobscript: + +` +#!/bin/bash +#PBS -A PROJECT_ID +#PBS -q qprod +#PBS -l select=1:ncpus=16,walltime=02:00:00 + +# change to local scratch directory +SCR=/lscratch/$PBS_JOBID +mkdir -p $SCR ; cd $SCR || exit + +# get individual tasks from tasklist with index from PBS JOB ARRAY +TASK=$(sed -n "${PBS_ARRAY_INDEX}p" $PBS_O_WORKDIR/tasklist) + +# copy input file and executable to scratch +cp $PBS_O_WORKDIR/$TASK input ; cp $PBS_O_WORKDIR/myprog.x . + +# execute the calculation +./myprog.x < input > output + +# copy output file to submit directory +cp output $PBS_O_WORKDIR/$TASK.out +` + +In this example, the submit directory holds the 900 input files, +executable myprog.x and the jobscript file. As input for each run, we +take the filename of input file from created tasklist file. We copy the +input file to local scratch /lscratch/$PBS_JOBID, execute the myprog.x +and copy the output file back to >the submit directory, +under the $TASK.out name. The myprog.x runs on one node only and must +use threads to run in parallel. Be aware, that if the myprog.x **is not +multithreaded**, then all the **jobs are run as single thread programs +in sequential** manner. Due to allocation of the whole node, the +accounted time is equal to the usage of whole node**, while using only +1/16 of the node! + +If huge number of parallel multicore (in means of multinode multithread, +e. g. MPI enabled) jobs is needed to run, then a job array approach +should also be used. The main difference compared to previous example +using one node is that the local scratch should not be used (as it's not +shared between nodes) and MPI or other technique for parallel multinode +run has to be used properly. + +### Submit the job array + +To submit the job array, use the qsub -J command. The 900 jobs of the +[example above](capacity-computing.html#array_example) may +be submitted like this: + +` +$ qsub -N JOBNAME -J 1-900 jobscript +12345[].dm2 +` + +In this example, we submit a job array of 900 subjobs. Each subjob will +run on full node and is assumed to take less than 2 hours (please note +the #PBS directives in the beginning of the jobscript file, dont' +forget to set your valid PROJECT_ID and desired queue). + +Sometimes for testing purposes, you may need to submit only one-element +array. This is not allowed by PBSPro, but there's a workaround: + +` +$ qsub -N JOBNAME -J 9-10:2 jobscript +` + +This will only choose the lower index (9 in this example) for +submitting/running your job. + +### Manage the job array + +Check status of the job array by the qstat command. + +` +$ qstat -a 12345[].dm2 + +dm2: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +12345[].dm2 user2 qprod xx 13516 1 16 -- 00:50 B 00:02 +` + +The status B means that some subjobs are already running. + +Check status of the first 100 subjobs by the qstat command. + +` +$ qstat -a 12345[1-100].dm2 + +dm2: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +12345[1].dm2 user2 qprod xx 13516 1 16 -- 00:50 R 00:02 +12345[2].dm2 user2 qprod xx 13516 1 16 -- 00:50 R 00:02 +12345[3].dm2 user2 qprod xx 13516 1 16 -- 00:50 R 00:01 +12345[4].dm2 user2 qprod xx 13516 1 16 -- 00:50 Q -- + . . . . . . . . . . . + , . . . . . . . . . . +12345[100].dm2 user2 qprod xx 13516 1 16 -- 00:50 Q -- +` + +Delete the entire job array. Running subjobs will be killed, queueing +subjobs will be deleted. + +` +$ qdel 12345[].dm2 +` + +Deleting large job arrays may take a while. + +Display status information for all user's jobs, job arrays, and subjobs. + +` +$ qstat -u $USER -t +` + +Display status information for all user's subjobs. + +` +$ qstat -u $USER -tJ +` + +Read more on job arrays in the [PBSPro Users +guide](../../pbspro-documentation.html). + +GNU parallel +---------------- + +Use GNU parallel to run many single core tasks on one node. + +GNU parallel is a shell tool for executing jobs in parallel using one or +more computers. A job can be a single command or a small script that has +to be run for each of the lines in the input. GNU parallel is most +useful in running single core jobs via the queue system on Anselm. + +For more information and examples see the parallel man page: + +` +$ module add parallel +$ man parallel +` + +### GNU parallel jobscript + +The GNU parallel shell executes multiple instances of the jobscript +using all cores on the node. The instances execute different work, +controlled by the $PARALLEL_SEQ variable. + +Example: + +Assume we have 101 input files with name beginning with "file" (e. g. +file001, ..., file101). Assume we would like to use each of these input +files with program executable myprog.x, each as a separate single core +job. We call these single core jobs tasks. + +First, we create a tasklist file, listing all tasks - all input files in +our example: + +` +$ find . -name 'file*' > tasklist +` + +Then we create jobscript: + +` +#!/bin/bash +#PBS -A PROJECT_ID +#PBS -q qprod +#PBS -l select=1:ncpus=16,walltime=02:00:00 + +[ -z "$PARALLEL_SEQ" ] && +{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/tasklist $0 ; } + +# change to local scratch directory +SCR=/lscratch/$PBS_JOBID/$PARALLEL_SEQ +mkdir -p $SCR ; cd $SCR || exit + +# get individual task from tasklist +TASK=$1  + +# copy input file and executable to scratch +cp $PBS_O_WORKDIR/$TASK input + +# execute the calculation +cat input > output + +# copy output file to submit directory +cp output $PBS_O_WORKDIR/$TASK.out +` + +In this example, tasks from tasklist are executed via the GNU +parallel. The jobscript executes multiple instances of itself in +parallel, on all cores of the node. Once an instace of jobscript is +finished, new instance starts until all entries in tasklist are +processed. Currently processed entry of the joblist may be retrieved via +$1 variable. Variable $TASK expands to one of the input filenames from +tasklist. We copy the input file to local scratch, execute the myprog.x +and copy the output file back to the submit directory, under the +$TASK.out name. + +### Submit the job + +To submit the job, use the qsub command. The 101 tasks' job of the +[example above](capacity-computing.html#gp_example) may be +submitted like this: + +` +$ qsub -N JOBNAME jobscript +12345.dm2 +` + +In this example, we submit a job of 101 tasks. 16 input files will be +processed in parallel. The 101 tasks on 16 cores are assumed to +complete in less than 2 hours. + +Please note the #PBS directives in the beginning of the jobscript file, +dont' forget to set your valid PROJECT_ID and desired queue. + +Job arrays and GNU parallel +------------------------------- + +Combine the Job arrays and GNU parallel for best throughput of single +core jobs + +While job arrays are able to utilize all available computational nodes, +the GNU parallel can be used to efficiently run multiple single-core +jobs on single node. The two approaches may be combined to utilize all +available (current and future) resources to execute single core jobs. + +Every subjob in an array runs GNU parallel to utilize all cores on the +node + +### GNU parallel, shared jobscript + +Combined approach, very similar to job arrays, can be taken. Job array +is submitted to the queuing system. The subjobs run GNU parallel. The +GNU parallel shell executes multiple instances of the jobscript using +all cores on the node. The instances execute different work, controlled +by the $PBS_JOB_ARRAY and $PARALLEL_SEQ variables. + +Example: + +Assume we have 992 input files with name beginning with "file" (e. g. +file001, ..., file992). Assume we would like to use each of these input +files with program executable myprog.x, each as a separate single core +job. We call these single core jobs tasks. + +First, we create a tasklist file, listing all tasks - all input files in +our example: + +` +$ find . -name 'file*' > tasklist +` + +Next we create a file, controlling how many tasks will be executed in +one subjob + +` +$ seq 32 > numtasks +` + +Then we create jobscript: + +` +#!/bin/bash +#PBS -A PROJECT_ID +#PBS -q qprod +#PBS -l select=1:ncpus=16,walltime=02:00:00 + +[ -z "$PARALLEL_SEQ" ] && +{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/numtasks $0 ; } + +# change to local scratch directory +SCR=/lscratch/$PBS_JOBID/$PARALLEL_SEQ +mkdir -p $SCR ; cd $SCR || exit + +# get individual task from tasklist with index from PBS JOB ARRAY and index form Parallel +IDX=$(($PBS_ARRAY_INDEX + $PARALLEL_SEQ - 1)) +TASK=$(sed -n "${IDX}p" $PBS_O_WORKDIR/tasklist) +[ -z "$TASK" ] && exit + +# copy input file and executable to scratch +cp $PBS_O_WORKDIR/$TASK input + +# execute the calculation +cat input > output + +# copy output file to submit directory +cp output $PBS_O_WORKDIR/$TASK.out +` + +In this example, the jobscript executes in multiple instances in +parallel, on all cores of a computing node. Variable $TASK expands to +one of the input filenames from tasklist. We copy the input file to +local scratch, execute the myprog.x and copy the output file back to the +submit directory, under the $TASK.out name. The numtasks file controls +how many tasks will be run per subjob. Once an task is finished, new +task starts, until the number of tasks in numtasks file is reached. + +Select subjob walltime and number of tasks per subjob carefully + + When deciding this values, think about following guiding rules : + +1. Let n=N/16. Inequality (n+1) * T < W should hold. The N is + number of tasks per subjob, T is expected single task walltime and W + is subjob walltime. Short subjob walltime improves scheduling and + job throughput. +2. Number of tasks should be modulo 16. +3. These rules are valid only when all tasks have similar task + walltimes T. + +### Submit the job array + +To submit the job array, use the qsub -J command. The 992 tasks' job of +the [example +above](capacity-computing.html#combined_example) may be +submitted like this: + +` +$ qsub -N JOBNAME -J 1-992:32 jobscript +12345[].dm2 +` + +In this example, we submit a job array of 31 subjobs. Note the -J +1-992:**32**, this must be the same as the number sent to numtasks file. +Each subjob will run on full node and process 16 input files in +parallel, 32 in total per subjob. Every subjob is assumed to complete +in less than 2 hours. + +Please note the #PBS directives in the beginning of the jobscript file, +dont' forget to set your valid PROJECT_ID and desired queue. + +Examples +-------- + +Download the examples in +[capacity.zip](capacity-computing-examples), +illustrating the above listed ways to run huge number of jobs. We +recommend to try out the examples, before using this for running +production jobs. + +Unzip the archive in an empty directory on Anselm and follow the +instructions in the README file + +` +$ unzip capacity.zip +$ cat README +` + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/fairshare_formula.png b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/fairshare_formula.png new file mode 100644 index 0000000000000000000000000000000000000000..6a5a1443fa08cd9d3c62bea52bbb48136b2501dc Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/fairshare_formula.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.md b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..e42b4037ca0dbbcb978d985a69a1548c71ef1fdc --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.md @@ -0,0 +1,71 @@ +Resource Allocation and Job Execution +===================================== + + + +To run a [job](../introduction.html), [computational +resources](../introduction.html) for this particular job +must be allocated. This is done via the PBS Pro job workload manager +software, which efficiently distributes workloads across the +supercomputer. Extensive informations about PBS Pro can be found in the +[official documentation +here](../../pbspro-documentation.html), especially in +the [PBS Pro User's +Guide](https://docs.it4i.cz/pbspro-documentation/pbspro-users-guide). + +Resources Allocation Policy +--------------------------- + +The resources are allocated to the job in a fairshare fashion, subject +to constraints set by the queue and resources available to the Project. +[The Fairshare](job-priority.html) at Anselm ensures +that individual users may consume approximately equal amount of +resources per week. The resources are accessible via several queues for +queueing the jobs. The queues provide prioritized and exclusive access +to the computational resources. Following queues are available to Anselm +users: + +- **qexp**, the \ +- **qprod**, the \*** +- **qlong**, the Long queue, regula +- **qnvidia, qmic, qfat**, the Dedicated queues +- **qfree,** the Free resource utilization queue + +Check the queue status at <https://extranet.it4i.cz/anselm/> + +Read more on the [Resource Allocation +Policy](resources-allocation-policy.html) page. + +Job submission and execution +---------------------------- + +Use the **qsub** command to submit your jobs. + +The qsub submits the job into the queue. The qsub command creates a +request to the PBS Job manager for allocation of specified resources. +The **smallest allocation unit is entire node, 16 cores**, with +exception of the qexp queue. The resources will be allocated when +available, subject to allocation policies and constraints. **After the +resources are allocated the jobscript or interactive shell is executed +on first of the allocated nodes.** + +Read more on the [Job submission and +execution](job-submission-and-execution.html) page. + +Capacity computing +------------------ + +Use Job arrays when running huge number of jobs. +Use GNU Parallel and/or Job arrays when running (many) single core jobs. + +In many cases, it is useful to submit huge (>100+) number of +computational jobs into the PBS queue system. Huge number of (small) +jobs is one of the most effective ways to execute embarrassingly +parallel calculations, achieving best runtime, throughput and computer +utilization. In this chapter, we discuss the the recommended way to run +huge number of jobs, including **ways to run huge number of single core +jobs**. + +Read more on [Capacity +computing](capacity-computing.html) page. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.md b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.md new file mode 100644 index 0000000000000000000000000000000000000000..2ef433187bd002e6ac28e8f8303c4864bc76602b --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.md @@ -0,0 +1,103 @@ +Job scheduling +============== + +Job execution priority +---------------------- + +Scheduler gives each job an execution priority and then uses this job +execution priority to select which job(s) to run. + +Job execution priority on Anselm is determined by these job properties +(in order of importance): + +1. queue priority +2. fairshare priority +3. eligible time + +### Queue priority + +Queue priority is priority of queue where job is queued before +execution. + +Queue priority has the biggest impact on job execution priority. +Execution priority of jobs in higher priority queues is always greater +than execution priority of jobs in lower priority queues. Other +properties of job used for determining job execution priority (fairshare +priority, eligible time) cannot compete with queue priority. + +Queue priorities can be seen at <https://extranet.it4i.cz/anselm/queues> + +### Fairshare priority + +Fairshare priority is priority calculated on recent usage of resources. +Fairshare priority is calculated per project, all members of project +share same fairshare priority. Projects with higher recent usage have +lower fairshare priority than projects with lower or none recent usage. + +Fairshare priority is used for ranking jobs with equal queue priority. + +Fairshare priority is calculated as + + + +where MAX_FAIRSHARE has value 1E6, +usage~Project~ is cumulated usage by all members of selected project, +usage~Total~ is total usage by all users, by all projects. + +Usage counts allocated corehours (ncpus*walltime). Usage is decayed, or +cut in half periodically, at the interval 168 hours (one week). +Jobs queued in queue qexp are not calculated to project's usage. + +Calculated usage and fairshare priority can be seen at +<https://extranet.it4i.cz/anselm/projects>. + +Calculated fairshare priority can be also seen as +Resource_List.fairshare attribute of a job. + +###Eligible time + +Eligible time is amount (in seconds) of eligible time job accrued while +waiting to run. Jobs with higher eligible time gains higher +priority. + +Eligible time has the least impact on execution priority. Eligible time +is used for sorting jobs with equal queue priority and fairshare +priority. It is very, very difficult for >eligible time to +compete with fairshare priority. + +Eligible time can be seen as eligible_time attribute of +job. + +### Formula + +Job execution priority (job sort formula) is calculated as: + + + +### Job backfilling + +Anselm cluster uses job backfilling. + +Backfilling means fitting smaller jobs around the higher-priority jobs +that the scheduler is going to run next, in such a way that the +higher-priority jobs are not delayed. Backfilling allows us to keep +resources from becoming idle when the top job (job with the highest +execution priority) cannot run. + +The scheduler makes a list of jobs to run in order of execution +priority. Scheduler looks for smaller jobs that can fit into the usage +gaps +around the highest-priority jobs in the list. The scheduler looks in the +prioritized list of jobs and chooses the highest-priority smaller jobs +that fit. Filler jobs are run only if they will not delay the start time +of top jobs. + +It means, that jobs with lower execution priority can be run before jobs +with higher execution priority. + +It is **very beneficial to specify the walltime** when submitting jobs. + +Specifying more accurate walltime enables better schedulling, better +execution times and better resource usage. Jobs with suitable (small) +walltime could be backfilled - and overtake job(s) with higher priority. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.md b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.md new file mode 100644 index 0000000000000000000000000000000000000000..599e0563344a9a675daae7c8adaa83a8956aabf9 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.md @@ -0,0 +1,530 @@ +Job submission and execution +============================ + + + +Job Submission +-------------- + +When allocating computational resources for the job, please specify + +1. suitable queue for your job (default is qprod) +2. number of computational nodes required +3. number of cores per node required +4. maximum wall time allocated to your calculation, note that jobs + exceeding maximum wall time will be killed +5. Project ID +6. Jobscript or interactive switch + +Use the **qsub** command to submit your job to a queue for allocation of +the computational resources. + +Submit the job using the qsub command: + +` +$ qsub -A Project_ID -q queue -l select=x:ncpus=y,walltime=[[hh:]mm:]ss[.ms] jobscript +` + +The qsub submits the job into the queue, in another words the qsub +command creates a request to the PBS Job manager for allocation of +specified resources. The resources will be allocated when available, +subject to above described policies and constraints. **After the +resources are allocated the jobscript or interactive shell is executed +on first of the allocated nodes.** + +### Job Submission Examples + +` +$ qsub -A OPEN-0-0 -q qprod -l select=64:ncpus=16,walltime=03:00:00 ./myjob +` + +In this example, we allocate 64 nodes, 16 cores per node, for 3 hours. +We allocate these resources via the qprod queue, consumed resources will +be accounted to the Project identified by Project ID OPEN-0-0. Jobscript +myjob will be executed on the first node in the allocation. + + + +` +$ qsub -q qexp -l select=4:ncpus=16 -I +` + +In this example, we allocate 4 nodes, 16 cores per node, for 1 hour. We +allocate these resources via the qexp queue. The resources will be +available interactively + + + +` +$ qsub -A OPEN-0-0 -q qnvidia -l select=10:ncpus=16 ./myjob +` + +In this example, we allocate 10 nvidia accelerated nodes, 16 cores per +node, for 24 hours. We allocate these resources via the qnvidia queue. +Jobscript myjob will be executed on the first node in the allocation. + + + +` +$ qsub -A OPEN-0-0 -q qfree -l select=10:ncpus=16 ./myjob +` + +In this example, we allocate 10 nodes, 16 cores per node, for 12 hours. +We allocate these resources via the qfree queue. It is not required that +the project OPEN-0-0 has any available resources left. Consumed +resources are still accounted for. Jobscript myjob will be executed on +the first node in the allocation. + + + +All qsub options may be [saved directly into the +jobscript](job-submission-and-execution.html#PBSsaved). In +such a case, no options to qsub are needed. + +` +$ qsub ./myjob +` + + + +By default, the PBS batch system sends an e-mail only when the job is +aborted. Disabling mail events completely can be done like this: + +` +$ qsub -m n +` + +Advanced job placement +---------------------- + +### Placement by name + +Specific nodes may be allocated via the PBS + +` +qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=16:host=cn171+1:ncpus=16:host=cn172 -I +` + +In this example, we allocate nodes cn171 and cn172, all 16 cores per +node, for 24 hours. Consumed resources will be accounted to the Project +identified by Project ID OPEN-0-0. The resources will be available +interactively. + +### Placement by CPU type + +Nodes equipped with Intel Xeon E5-2665 CPU have base clock frequency +2.4GHz, nodes equipped with Intel Xeon E5-2470 CPU have base frequency +2.3 GHz (see section Compute Nodes for details). Nodes may be selected +via the PBS resource attribute +cpu_freq . + + CPU Type base freq. Nodes cpu_freq attribute + -------------- |---|---|-- ---------------------------- --------------------- + Intel Xeon E5-2665 2.4GHz cn[1-180], cn[208-209] 24 + Intel Xeon E5-2470 2.3GHz cn[181-207] 23 + + + +` +$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16:cpu_freq=24 -I +` + +In this example, we allocate 4 nodes, 16 cores, selecting only the nodes +with Intel Xeon E5-2665 CPU. + +### Placement by IB switch + +Groups of computational nodes are connected to chassis integrated +Infiniband switches. These switches form the leaf switch layer of the +[Infiniband network](../network.html) +fat tree topology. Nodes sharing the leaf +switch can communicate most efficiently. Sharing the same switch +prevents hops in the network and provides for unbiased, most efficient +network communication. + +Nodes sharing the same switch may be selected via the PBS resource +attribute ibswitch. Values of this attribute are iswXX, where XX is the +switch number. The node-switch mapping can be seen at [Hardware +Overview](../hardware-overview.html) section. + +We recommend allocating compute nodes of a single switch when best +possible computational network performance is required to run the job +efficiently: + + qsub -A OPEN-0-0 -q qprod -l select=18:ncpus=16:ibswitch=isw11 ./myjob + +In this example, we request all the 18 nodes sharing the isw11 switch +for 24 hours. Full chassis will be allocated. + +Advanced job handling +--------------------- + +### Selecting Turbo Boost off + +Intel Turbo Boost Technology is on by default. We strongly recommend +keeping the default. + +If necessary (such as in case of benchmarking) you can disable the Turbo +for all nodes of the job by using the PBS resource attribute +cpu_turbo_boost + + $ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=16 -l cpu_turbo_boost=0 -I + +More about the Intel Turbo Boost in the TurboBoost section + +### Advanced examples + +In the following example, we select an allocation for benchmarking a +very special and demanding MPI program. We request Turbo off, 2 full +chassis of compute nodes (nodes sharing the same IB switches) for 30 +minutes: + + $ qsub -A OPEN-0-0 -q qprod + -l select=18:ncpus=16:ibswitch=isw10:mpiprocs=1:ompthreads=16+18:ncpus=16:ibswitch=isw20:mpiprocs=16:ompthreads=1 + -l cpu_turbo_boost=0,walltime=00:30:00 + -N Benchmark ./mybenchmark + +The MPI processes will be distributed differently on the nodes connected +to the two switches. On the isw10 nodes, we will run 1 MPI process per +node 16 threads per process, on isw20 nodes we will run 16 plain MPI +processes. + +Although this example is somewhat artificial, it demonstrates the +flexibility of the qsub command options. + +Job Management +-------------- + +Check status of your jobs using the **qstat** and **check-pbs-jobs** +commands + +` +$ qstat -a +$ qstat -a -u username +$ qstat -an -u username +$ qstat -f 12345.srv11 +` + +Example: + +` +$ qstat -a + +srv11: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +16287.srv11 user1 qlong job1 6183 4 64 -- 144:0 R 38:25 +16468.srv11 user1 qlong job2 8060 4 64 -- 144:0 R 17:44 +16547.srv11 user2 qprod job3x 13516 2 32 -- 48:00 R 00:58 +` + +In this example user1 and user2 are running jobs named job1, job2 and +job3x. The jobs job1 and job2 are using 4 nodes, 16 cores per node each. +The job1 already runs for 38 hours and 25 minutes, job2 for 17 hours 44 +minutes. The job1 already consumed 64*38.41 = 2458.6 core hours. The +job3x already consumed 0.96*32 = 30.93 core hours. These consumed core +hours will be accounted on the respective project accounts, regardless +of whether the allocated cores were actually used for computations. + +Check status of your jobs using check-pbs-jobs command. Check presence +of user's PBS jobs' processes on execution hosts. Display load, +processes. Display job standard and error output. Continuously display +(tail -f) job standard or error output. + +` +$ check-pbs-jobs --check-all +$ check-pbs-jobs --print-load --print-processes +$ check-pbs-jobs --print-job-out --print-job-err + +$ check-pbs-jobs --jobid JOBID --check-all --print-all + +$ check-pbs-jobs --jobid JOBID --tailf-job-out +` + +Examples: + +` +$ check-pbs-jobs --check-all +JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165 +Check session id: OK +Check processes +cn164: OK +cn165: No process +` + +In this example we see that job 35141.dm2 currently runs no process on +allocated node cn165, which may indicate an execution error. + +` +$ check-pbs-jobs --print-load --print-processes +JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165 +Print load +cn164: LOAD: 16.01, 16.01, 16.00 +cn165: LOAD: 0.01, 0.00, 0.01 +Print processes + %CPU CMD +cn164: 0.0 -bash +cn164: 0.0 /bin/bash /var/spool/PBS/mom_priv/jobs/35141.dm2.SC +cn164: 99.7 run-task +... +` + +In this example we see that job 35141.dm2 currently runs process +run-task on node cn164, using one thread only, while node cn165 is +empty, which may indicate an execution error. + +` +$ check-pbs-jobs --jobid 35141.dm2 --print-job-out +JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165 +Print job standard output: +======================== Job start ========================== +Started at   : Fri Aug 30 02:47:53 CEST 2013 +Script name  : script +Run loop 1 +Run loop 2 +Run loop 3 +` + +In this example, we see actual output (some iteration loops) of the job +35141.dm2 + +Manage your queued or running jobs, using the **qhold**, **qrls**, +qdel,** **qsig** or **qalter** commands + +You may release your allocation at any time, using qdel command + +` +$ qdel 12345.srv11 +` + +You may kill a running job by force, using qsig command + +` +$ qsig -s 9 12345.srv11 +` + +Learn more by reading the pbs man page + +` +$ man pbs_professional +` + +Job Execution +------------- + +### Jobscript + +Prepare the jobscript to run batch jobs in the PBS queue system + +The Jobscript is a user made script, controlling sequence of commands +for executing the calculation. It is often written in bash, other +scripts may be used as well. The jobscript is supplied to PBS **qsub** +command as an argument and executed by the PBS Professional workload +manager. + +The jobscript or interactive shell is executed on first of the allocated +nodes. + +` +$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob +$ qstat -n -u username + +srv11: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +15209.srv11 username qexp Name0 5530 4 64 -- 01:00 R 00:00 + cn17/0*16+cn108/0*16+cn109/0*16+cn110/0*16 +` + + In this example, the nodes cn17, cn108, cn109 and cn110 were allocated +for 1 hour via the qexp queue. The jobscript myjob will be executed on +the node cn17, while the nodes cn108, cn109 and cn110 are available for +use as well. + +The jobscript or interactive shell is by default executed in home +directory + +` +$ qsub -q qexp -l select=4:ncpus=16 -I +qsub: waiting for job 15210.srv11 to start +qsub: job 15210.srv11 ready + +$ pwd +/home/username +` + +In this example, 4 nodes were allocated interactively for 1 hour via the +qexp queue. The interactive shell is executed in the home directory. + +All nodes within the allocation may be accessed via ssh. Unallocated +nodes are not accessible to user. + +The allocated nodes are accessible via ssh from login nodes. The nodes +may access each other via ssh as well. + +Calculations on allocated nodes may be executed remotely via the MPI, +ssh, pdsh or clush. You may find out which nodes belong to the +allocation by reading the $PBS_NODEFILE file + +` +qsub -q qexp -l select=4:ncpus=16 -I +qsub: waiting for job 15210.srv11 to start +qsub: job 15210.srv11 ready + +$ pwd +/home/username + +$ sort -u $PBS_NODEFILE +cn17.bullx +cn108.bullx +cn109.bullx +cn110.bullx + +$ pdsh -w cn17,cn[108-110] hostname +cn17: cn17 +cn108: cn108 +cn109: cn109 +cn110: cn110 +` + +In this example, the hostname program is executed via pdsh from the +interactive shell. The execution runs on all four allocated nodes. The +same result would be achieved if the pdsh is called from any of the +allocated nodes or from the login nodes. + +### Example Jobscript for MPI Calculation + +Production jobs must use the /scratch directory for I/O + +The recommended way to run production jobs is to change to /scratch +directory early in the jobscript, copy all inputs to /scratch, execute +the calculations and copy outputs to home directory. + +` +#!/bin/bash + +# change to scratch directory, exit on failure +SCRDIR=/scratch/$USER/myjob +mkdir -p $SCRDIR +cd $SCRDIR || exit + +# copy input file to scratch +cp $PBS_O_WORKDIR/input . +cp $PBS_O_WORKDIR/mympiprog.x . + +# load the mpi module +module load openmpi + +# execute the calculation +mpiexec -pernode ./mympiprog.x + +# copy output file to home +cp output $PBS_O_WORKDIR/. + +#exit +exit +` + +In this example, some directory on the /home holds the input file input +and executable mympiprog.x . We create a directory myjob on the /scratch +filesystem, copy input and executable files from the /home directory +where the qsub was invoked ($PBS_O_WORKDIR) to /scratch, execute the +MPI programm mympiprog.x and copy the output file back to the /home +directory. The mympiprog.x is executed as one process per node, on all +allocated nodes. + +Consider preloading inputs and executables onto [shared +scratch](../storage.html) before the calculation starts. + +In some cases, it may be impractical to copy the inputs to scratch and +outputs to home. This is especially true when very large input and +output files are expected, or when the files should be reused by a +subsequent calculation. In such a case, it is users responsibility to +preload the input files on shared /scratch before the job submission and +retrieve the outputs manually, after all calculations are finished. + +Store the qsub options within the jobscript. +Use **mpiprocs** and **ompthreads** qsub options to control the MPI job +execution. + +Example jobscript for an MPI job with preloaded inputs and executables, +options for qsub are stored within the script : + +` +#!/bin/bash +#PBS -q qprod +#PBS -N MYJOB +#PBS -l select=100:ncpus=16:mpiprocs=1:ompthreads=16 +#PBS -A OPEN-0-0 + +# change to scratch directory, exit on failure +SCRDIR=/scratch/$USER/myjob +cd $SCRDIR || exit + +# load the mpi module +module load openmpi + +# execute the calculation +mpiexec ./mympiprog.x + +#exit +exit +` + +In this example, input and executable files are assumed preloaded +manually in /scratch/$USER/myjob directory. Note the **mpiprocs** and +ompthreads** qsub options, controlling behavior of the MPI execution. +The mympiprog.x is executed as one process per node, on all 100 +allocated nodes. If mympiprog.x implements OpenMP threads, it will run +16 threads per node. + +More information is found in the [Running +OpenMPI](../software/mpi-1/Running_OpenMPI.html) and +[Running MPICH2](../software/mpi-1/running-mpich2.html) +sections. + +### Example Jobscript for Single Node Calculation + +Local scratch directory is often useful for single node jobs. Local +scratch will be deleted immediately after the job ends. + +Example jobscript for single node calculation, using [local +scratch](../storage.html) on the node: + +` +#!/bin/bash + +# change to local scratch directory +cd /lscratch/$PBS_JOBID || exit + +# copy input file to scratch +cp $PBS_O_WORKDIR/input . +cp $PBS_O_WORKDIR/myprog.x . + +# execute the calculation +./myprog.x + +# copy output file to home +cp output $PBS_O_WORKDIR/. + +#exit +exit +` + +In this example, some directory on the home holds the input file input +and executable myprog.x . We copy input and executable files from the +home directory where the qsub was invoked ($PBS_O_WORKDIR) to local +scratch /lscratch/$PBS_JOBID, execute the myprog.x and copy the output +file back to the /home directory. The myprog.x runs on one node only and +may use threads. + +### Other Jobscript Examples + +Further jobscript examples may be found in the +[Software](../software.1.html) section and the [Capacity +computing](capacity-computing.html) section. + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job_sort_formula.png b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job_sort_formula.png new file mode 100644 index 0000000000000000000000000000000000000000..6078911559aa56effb4b342fa4ffd074cfaed46f Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job_sort_formula.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.md b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.md new file mode 100644 index 0000000000000000000000000000000000000000..58da4ca5bc6e08cd916173b5a71a0cfe291999d6 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.md @@ -0,0 +1,213 @@ +Resources Allocation Policy +=========================== + + + +Resources Allocation Policy +--------------------------- + +The resources are allocated to the job in a fairshare fashion, subject +to constraints set by the queue and resources available to the Project. +The Fairshare at Anselm ensures that individual users may consume +approximately equal amount of resources per week. Detailed information +in the [Job scheduling](job-priority.html) section. The +resources are accessible via several queues for queueing the jobs. The +queues provide prioritized and exclusive access to the computational +resources. Following table provides the queue partitioning overview:  + + + |queue |active project |project resources |nodes<th align="left">min ncpus*<th align="left">priority<th align="left">authorization<th align="left">walltime | + | --- | --- | + |<strong>qexp</strong>\ |no |none required |2 reserved, 31 totalincluding MIC, GPU and FAT nodes |1 |><em>150</em> |no |1h | + |<strong>qprod</strong>\ |yes |> 0 |><em>178 nodes w/o accelerator</em>\ |16 |0 |no |24/48h | + |<strong>qlong</strong>Long queue\ |yes |> 0 |60 nodes w/o accelerator |16 |0 |no |72/144h | + |<strong>qnvidia, qmic, qfat</strong>Dedicated queues\ |yes |<p>> 0\ |23 total qnvidia4 total qmic2 total qfat |16 |><em>200</em> |yes |24/48h | + |<strong>qfree</strong>\ |yes |none required |178 w/o accelerator |16 |-1024 |no |12h | + +The qfree queue is not free of charge**. [Normal +accounting](resources-allocation-policy.html#resources-accounting-policy) +applies. However, it allows for utilization of free resources, once a +Project exhausted all its allocated computational resources. This does +not apply for Directors Discreation's projects (DD projects) by default. +Usage of qfree after exhaustion of DD projects computational resources +is allowed after request for this queue. + +The qexp queue is equipped with the nodes not having the very same CPU +clock speed.** Should you need the very same CPU speed, you have to +select the proper nodes during the PSB job submission. +** + +- **qexp**, the \: This queue is dedicated for testing and + running very small jobs. It is not required to specify a project to + enter the qexp. >*>There are 2 nodes always reserved for + this queue (w/o accelerator), maximum 8 nodes are available via the + qexp for a particular user, from a pool of nodes containing + **Nvidia** accelerated nodes (cn181-203), **MIC** accelerated + nodes (cn204-207) and **Fat** nodes with 512GB RAM (cn208-209). This + enables to test and tune also accelerated code or code with higher + RAM requirements.* The nodes may be allocated on per + core basis. No special authorization is required to use it. The + maximum runtime in qexp is 1 hour. +- **qprod**, the \***: This queue is intended for + normal production runs. It is required that active project with + nonzero remaining resources is specified to enter the qprod. All + nodes may be accessed via the qprod queue, except the reserved ones. + >*>178 nodes without accelerator are + included.* Full nodes, 16 cores per node + are allocated. The queue runs with medium priority and no special + authorization is required to use it. The maximum runtime in qprod is + 48 hours. +- **qlong**, the Long queue***: This queue is intended for long + production runs. It is required that active project with nonzero + remaining resources is specified to enter the qlong. Only 60 nodes + without acceleration may be accessed via the qlong queue. Full + nodes, 16 cores per node are allocated. The queue runs with medium + priority and no special authorization is required to use it.> + *The maximum runtime in qlong is 144 hours (three times of the + standard qprod time - 3 * 48 h).* +- **qnvidia, qmic, qfat**, the Dedicated queues***: The queue qnvidia + is dedicated to access the Nvidia accelerated nodes, the qmic to + access MIC nodes and qfat the Fat nodes. It is required that active + project with nonzero remaining resources is specified to enter + these queues. 23 nvidia, 4 mic and 2 fat nodes are included. Full + nodes, 16 cores per node are allocated. The queues run with> + *very high priority*, the jobs will be scheduled before the + jobs coming from the> *qexp* queue. An PI> *needs + explicitly* ask + [support](https://support.it4i.cz/rt/) for + authorization to enter the dedicated queues for all users associated + to her/his Project. +- **qfree**, The \***: The queue qfree is intended + for utilization of free resources, after a Project exhausted all its + allocated computational resources (Does not apply to DD projects + by default. DD projects have to request for persmission on qfree + after exhaustion of computational resources.). It is required that + active project is specified to enter the queue, however no remaining + resources are required. Consumed resources will be accounted to + the Project. Only 178 nodes without accelerator may be accessed from + this queue. Full nodes, 16 cores per node are allocated. The queue + runs with very low priority and no special authorization is required + to use it. The maximum runtime in qfree is 12 hours. + +### Notes + +The job wall clock time defaults to **half the maximum time**, see table +above. Longer wall time limits can be [set manually, see +examples](job-submission-and-execution.html). + +Jobs that exceed the reserved wall clock time (Req'd Time) get killed +automatically. Wall clock time limit can be changed for queuing jobs +(state Q) using the qalter command, however can not be changed for a +running job (state R). + +Anselm users may check current queue configuration at +<https://extranet.it4i.cz/anselm/queues>. + +### Queue status + +Check the status of jobs, queues and compute nodes at +<https://extranet.it4i.cz/anselm/> + + + +Display the queue status on Anselm: + +` +$ qstat -q +` + +The PBS allocation overview may be obtained also using the rspbs +command. + +` +$ rspbs +Usage: rspbs [options] + +Options: + --version            show program's version number and exit + -h, --help           show this help message and exit + --get-node-ncpu-chart +                       Print chart of allocated ncpus per node + --summary            Print summary + --get-server-details Print server + --get-queues         Print queues + --get-queues-details Print queues details + --get-reservations   Print reservations + --get-reservations-details +                       Print reservations details + --get-nodes          Print nodes of PBS complex + --get-nodeset        Print nodeset of PBS complex + --get-nodes-details  Print nodes details + --get-jobs           Print jobs + --get-jobs-details   Print jobs details + --get-jobs-check-params +                       Print jobid, job state, session_id, user, nodes + --get-users          Print users of jobs + --get-allocated-nodes +                       Print allocated nodes of jobs + --get-allocated-nodeset +                       Print allocated nodeset of jobs + --get-node-users     Print node users + --get-node-jobs      Print node jobs + --get-node-ncpus     Print number of ncpus per node + --get-node-allocated-ncpus +                       Print number of allocated ncpus per node + --get-node-qlist     Print node qlist + --get-node-ibswitch  Print node ibswitch + --get-user-nodes     Print user nodes + --get-user-nodeset   Print user nodeset + --get-user-jobs      Print user jobs + --get-user-jobc      Print number of jobs per user + --get-user-nodec     Print number of allocated nodes per user + --get-user-ncpus     Print number of allocated ncpus per user + --get-qlist-nodes    Print qlist nodes + --get-qlist-nodeset  Print qlist nodeset + --get-ibswitch-nodes Print ibswitch nodes + --get-ibswitch-nodeset +                       Print ibswitch nodeset + --state=STATE        Only for given job state + --jobid=JOBID        Only for given job ID + --user=USER          Only for given user + --node=NODE          Only for given node + --nodestate=NODESTATE +                       Only for given node state (affects only --get-node* +                       --get-qlist-* --get-ibswitch-* actions) + --incl-finished      Include finished jobs +` + +Resources Accounting Policy +------------------------------- + +### The Core-Hour + +The resources that are currently subject to accounting are the +core-hours. The core-hours are accounted on the wall clock basis. The +accounting runs whenever the computational cores are allocated or +blocked via the PBS Pro workload manager (the qsub command), regardless +of whether the cores are actually used for any calculation. 1 core-hour +is defined as 1 processor core allocated for 1 hour of wall clock time. +Allocating a full node (16 cores) for 1 hour accounts to 16 core-hours. +See example in the [Job submission and +execution](job-submission-and-execution.html) section. + +### Check consumed resources + +The **it4ifree** command is a part of it4i.portal.clients package, +located here: +<https://pypi.python.org/pypi/it4i.portal.clients> + +User may check at any time, how many core-hours have been consumed by +himself/herself and his/her projects. The command is available on +clusters' login nodes. + +` +$ it4ifree +Password: +    PID  Total Used ...by me Free +  -------- ------- ------ -------- ------- +  OPEN-0-0 1500000 400644  225265 1099356 +  DD-13-1   10000 2606 2606 7394 +` + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/rsweb.png b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/rsweb.png new file mode 100644 index 0000000000000000000000000000000000000000..568d23763fe2dacc3a86df3a2d4f37018bdf8399 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/rsweb.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/scheme.png b/converted/docs.it4i.cz/anselm-cluster-documentation/scheme.png new file mode 100644 index 0000000000000000000000000000000000000000..0f23978b92279535e42d93be79db253b03290c31 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/scheme.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys.md new file mode 100644 index 0000000000000000000000000000000000000000..e5130c0709a9ea0b0dccb4808bcf53472da91af8 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys.md @@ -0,0 +1,33 @@ +Overview of ANSYS Products +========================== + +[SVS FEM](http://www.svsfem.cz/)** as **[ANSYS +Channel partner](http://www.ansys.com/)** for Czech +Republic provided all ANSYS licenses for ANSELM cluster and supports of +all ANSYS Products (Multiphysics, Mechanical, MAPDL, CFX, Fluent, +Maxwell, LS-DYNA...) to IT staff and ANSYS users. If you are challenging +to problem of ANSYS functionality contact +please [hotline@svsfem.cz](mailto:hotline@svsfem.cz?subject=Ostrava%20-%20ANSELM) + +Anselm provides as commercial as academic variants. Academic variants +are distinguished by "**Academic...**" word in the name of  license or +by two letter preposition "**aa_**" in the license feature name. Change +of license is realized on command line respectively directly in user's +pbs file (see individual products). [ + More about +licensing +here](ansys/licensing.html) + +To load the latest version of any ANSYS product (Mechanical, Fluent, +CFX, MAPDL,...) load the module: + + $ module load ansys + +ANSYS supports interactive regime, but due to assumed solution of +extremely difficult tasks it is not recommended. + +If user needs to work in interactive regime we recommend to configure +the RSM service on the client machine which allows to forward the +solution to the Anselm directly from the client's Workbench project +(see ANSYS RSM service). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_1.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..786e9ce1f0c190e5ef83bfe691596d298b9efc15 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_1.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_2.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..77dc3d7cfeddad08626234586cdf4228fdb69ee9 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_2.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_3.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9a9d57cba8fc61ee756b49ced8888a95da46737b Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_3.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_4.jpg b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3027bc33158c70fd3b2844de87dbd3e0649300c2 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/Fluent_Licence_4.jpg differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.md new file mode 100644 index 0000000000000000000000000000000000000000..5d50cda135ee04d574eb928dbb7b2aedf1a87013 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.md @@ -0,0 +1,87 @@ +ANSYS CFX +========= + +[ANSYS +CFX](http://www.ansys.com/Products/Simulation+Technology/Fluid+Dynamics/Fluid+Dynamics+Products/ANSYS+CFX) +software is a high-performance, general purpose fluid dynamics program +that has been applied to solve wide-ranging fluid flow problems for over +20 years. At the heart of ANSYS CFX is its advanced solver technology, +the key to achieving reliable and accurate solutions quickly and +robustly. The modern, highly parallelized solver is the foundation for +an abundant choice of physical models to capture virtually any type of +phenomena related to fluid flow. The solver and its many physical models +are wrapped in a modern, intuitive, and flexible GUI and user +environment, with extensive capabilities for customization and +automation using session files, scripting and a powerful expression +language. + +To run ANSYS CFX in batch mode you can utilize/modify the default +cfx.pbs script and execute it via the qsub command. + +` +#!/bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-CFX-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> (working directory must exists) +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +module load ansys + +#### Set number of processors per host listing +#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) +procs_per_host=1 +#### Create host list +hl="" +for host in `cat $PBS_NODEFILE` +do + if [ "$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi +done + +echo Machines: $hl + +#-dev input.def includes the input of CFX analysis in DEF format +#-P the name of prefered license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial)) +/ansys_inc/v145/CFX/bin/cfx5solve -def input.def -size 4 -size-ni 4x -part-large -start-method "Platform MPI Distributed Parallel" -par-dist $hl -P aa_r +` + +Header of the pbs file (above) is common and description can be find +on [this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +SVS FEM recommends to utilize sources by keywords: nodes, ppn. These +keywords allows to address directly the number of nodes (computers) and +cores (ppn) which will be utilized in the job. Also the rest of code +assumes such structure of allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. >Input file has to be defined by common +CFX def file which is attached to the cfx solver via parameter +-def + +License** should be selected by parameter -P (Big letter **P**). +Licensed products are the following: aa_r +(ANSYS **Academic Research), ane3fl (ANSYS +Multiphysics)-**Commercial. +[ More + about licensing +here](licensing.html) + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.md new file mode 100644 index 0000000000000000000000000000000000000000..ab675d3381dc6377e63df7b7a5db21ff7db72aff --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.md @@ -0,0 +1,228 @@ +ANSYS Fluent +============ + +[ANSYS +Fluent](http://www.ansys.com/Products/Simulation+Technology/Fluid+Dynamics/Fluid+Dynamics+Products/ANSYS+Fluent) +software contains the broad physical modeling capabilities needed to +model flow, turbulence, heat transfer, and reactions for industrial +applications ranging from air flow over an aircraft wing to combustion +in a furnace, from bubble columns to oil platforms, from blood flow to +semiconductor manufacturing, and from clean room design to wastewater +treatment plants. Special models that give the software the ability to +model in-cylinder combustion, aeroacoustics, turbomachinery, and +multiphase systems have served to broaden its reach. + +1. Common way to run Fluent over pbs file +------------------------------------------------------ + +To run ANSYS Fluent in batch mode you can utilize/modify the +default fluent.pbs script and execute it via the qsub command. + +` +#!/bin/bash +#PBS -S /bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-Fluent-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> (working directory must exists) +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +#### Load ansys module so that we find the cfx5solve command +module load ansys + +# Use following line to specify MPI for message-passing instead +NCORES=`wc -l $PBS_NODEFILE |awk '{print $1}'` + +/ansys_inc/v145/fluent/bin/fluent 3d -t$NCORES -cnf=$PBS_NODEFILE -g -i fluent.jou +` + +Header of the pbs file (above) is common and description can be find +on [this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +[SVS FEM](http://www.svsfem.cz) recommends to utilize +sources by keywords: nodes, ppn. These keywords allows to address +directly the number of nodes (computers) and cores (ppn) which will be +utilized in the job. Also the rest of code assumes such structure of +allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. Input file has to be defined by common Fluent +journal file which is attached to the Fluent solver via parameter -i +fluent.jou + +Journal file with definition of the input geometry and boundary +conditions and defined process of solution has e.g. the following +structure: + + /file/read-case aircraft_2m.cas.gz + /solve/init + init + /solve/iterate + 10 + /file/write-case-dat aircraft_2m-solution + /exit yes + +The appropriate dimension of the problem has to be set by +parameter (2d/3d). + +2. Fast way to run Fluent from command line +-------------------------------------------------------- + +` +fluent solver_version [FLUENT_options] -i journal_file -pbs +` + +This syntax will start the ANSYS FLUENT job under PBS Professional using +the qsub command in a batch manner. When +resources are available, PBS Professional will start the job and return +a job ID, usually in the form of +*job_ID.hostname*. This job ID can then be used +to query, control, or stop the job using standard PBS Professional +commands, such as qstat or +qdel. The job will be run out of the current +working directory, and all output will be written to the file +fluent.o> +*job_ID*.     + +3. Running Fluent via user's config file +---------------------------------------- + +The sample script uses a configuration file called +pbs_fluent.conf  if no command line arguments +are present. This configuration file should be present in the directory +from which the jobs are submitted (which is also the directory in which +the jobs are executed). The following is an example of what the content +of pbs_fluent.conf can be: + +` +input="example_small.flin" +case="Small-1.65m.cas" +fluent_args="3d -pmyrinet" +outfile="fluent_test.out" +mpp="true" +` + +The following is an explanation of the parameters: + + input is the name of the input +file. + + case is the name of the +.cas file that the input file will utilize. + + fluent_args are extra ANSYS FLUENT +arguments. As shown in the previous example, you can specify the +interconnect by using the -p interconnect +command. The available interconnects include +ethernet (the default), +myrinet, class="monospace"> +infiniband, vendor, +altix>, and +crayx. The MPI is selected automatically, based +on the specified interconnect. + + outfile is the name of the file to which +the standard output will be sent. + + mpp="true" will tell the job script to +execute the job across multiple processors.         + +To run ANSYS Fluent in batch mode with user's config file you can +utilize/modify the following script and execute it via the qsub +command. + +` +#!/bin/sh +#PBS -l nodes=2:ppn=4 +#PBS -1 qprod +#PBS -N $USE-Fluent-Project +#PBS -A XX-YY-ZZ + + cd $PBS_O_WORKDIR + + #We assume that if they didn’t specify arguments then they should use the + #config file if [ "xx${input}${case}${mpp}${fluent_args}zz" = "xxzz" ]; then + if [ -f pbs_fluent.conf ]; then + . pbs_fluent.conf + else + printf "No command line arguments specified, " + printf "and no configuration file found. Exiting n" + fi + fi + + + #Augment the ANSYS FLUENT command line arguments case "$mpp" in + true) + #MPI job execution scenario + num_nodes=â€cat $PBS_NODEFILE | sort -u | wc -l†+ cpus=â€expr $num_nodes * $NCPUS†+ #Default arguments for mpp jobs, these should be changed to suit your + #needs. + fluent_args="-t${cpus} $fluent_args -cnf=$PBS_NODEFILE" + ;; + *) + #SMP case + #Default arguments for smp jobs, should be adjusted to suit your + #needs. + fluent_args="-t$NCPUS $fluent_args" + ;; + esac + #Default arguments for all jobs + fluent_args="-ssh -g -i $input $fluent_args" + + echo "---------- Going to start a fluent job with the following settings: + Input: $input + Case: $case + Output: $outfile + Fluent arguments: $fluent_args" + + #run the solver + /ansys_inc/v145/fluent/bin/fluent $fluent_args > $outfile +` + +It runs the jobs out of the directory from which they are +submitted (PBS_O_WORKDIR). + +4. Running Fluent in parralel +----------------------------- + +Fluent could be run in parallel only under Academic Research license. To +do so this ANSYS Academic Research license must be placed before ANSYS +CFD license in user preferences. To make this change anslic_admin +utility should be run + +` +/ansys_inc/shared_les/licensing/lic_admin/anslic_admin +` + +ANSLIC_ADMIN Utility will be run + + + + + + + + + +ANSYS Academic Research license should be moved up to the top of the +list. + + + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.md new file mode 100644 index 0000000000000000000000000000000000000000..397aa94adba08eb00f01dec704c3031739e8d110 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.md @@ -0,0 +1,86 @@ +ANSYS LS-DYNA +============= + +[ANSYS +LS-DYNA](http://www.ansys.com/Products/Simulation+Technology/Structural+Mechanics/Explicit+Dynamics/ANSYS+LS-DYNA) +software provides convenient and easy-to-use access to the +technology-rich, time-tested explicit solver without the need to contend +with the complex input requirements of this sophisticated program. +Introduced in 1996, ANSYS LS-DYNA capabilities have helped customers in +numerous industries to resolve highly intricate design +issues. >ANSYS Mechanical users have been able take advantage of +complex explicit solutions for a long time utilizing the traditional +ANSYS Parametric Design Language (APDL) environment. >These +explicit capabilities are available to ANSYS Workbench users as well. +The Workbench platform is a powerful, comprehensive, easy-to-use +environment for engineering simulation. CAD import from all sources, +geometry cleanup, automatic meshing, solution, parametric optimization, +result visualization and comprehensive report generation are all +available within a single fully interactive modern graphical user +environment. + +To run ANSYS LS-DYNA in batch mode you can utilize/modify the +default ansysdyna.pbs script and execute it via the qsub command. + +` +#!/bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-DYNA-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +#! Counts the number of processors +NPROCS=`wc -l < $PBS_NODEFILE` + +echo This job has allocated $NPROCS nodes + +module load ansys + +#### Set number of processors per host listing +#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) +procs_per_host=1 +#### Create host list +hl="" +for host in `cat $PBS_NODEFILE` +do + if [ "$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi +done + +echo Machines: $hl + +/ansys_inc/v145/ansys/bin/ansys145 -dis -lsdynampp i=input.k -machines $hl +` + +Header of the pbs file (above) is common and description can be +find on [this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html)>. +[SVS FEM](http://www.svsfem.cz) recommends to utilize +sources by keywords: nodes, ppn. These keywords allows to address +directly the number of nodes (computers) and cores (ppn) which will be +utilized in the job. Also the rest of code assumes such structure of +allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. Input file has to be defined by common LS-DYNA +.**k** file which is attached to the ansys solver via parameter i= + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.md new file mode 100644 index 0000000000000000000000000000000000000000..ac6357c2f9a6df62253546816739944985f0270e --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.md @@ -0,0 +1,81 @@ +ANSYS MAPDL +=========== + +**[ANSYS +Multiphysics](http://www.ansys.com/Products/Simulation+Technology/Structural+Mechanics/ANSYS+Multiphysics)** +software offers a comprehensive product solution for both multiphysics +and single-physics analysis. The product includes structural, thermal, +fluid and both high- and low-frequency electromagnetic analysis. The +product also contains solutions for both direct and sequentially coupled +physics problems including direct coupled-field elements and the ANSYS +multi-field solver. + +To run ANSYS MAPDL in batch mode you can utilize/modify the +default mapdl.pbs script and execute it via the qsub command. + +` +#!/bin/bash +#PBS -l nodes=2:ppn=16 +#PBS -q qprod +#PBS -N $USER-ANSYS-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> (working directory must exists) +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` +echo This jobs runs on the following processors: +echo `cat $PBS_NODEFILE` + +module load ansys + +#### Set number of processors per host listing +#### (set to 1 as $PBS_NODEFILE lists each node twice if :ppn=2) +procs_per_host=1 +#### Create host list +hl="" +for host in `cat $PBS_NODEFILE` +do + if [ "$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi +done + +echo Machines: $hl + +#-i input.dat includes the input of analysis in APDL format +#-o file.out is output file from ansys where all text outputs will be redirected +#-p the name of license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial), aa_r_dy=Academic AUTODYN) +/ansys_inc/v145/ansys/bin/ansys145 -b -dis -p aa_r -i input.dat -o file.out -machines $hl -dir $WORK_DIR +` + +Header of the pbs file (above) is common and description can be find on +[this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +[SVS FEM](http://www.svsfem.cz) recommends to utilize +sources by keywords: nodes, ppn. These keywords allows to address +directly the number of nodes (computers) and cores (ppn) which will be +utilized in the job. Also the rest of code assumes such structure of +allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. Input file has to be defined by common APDL +file which is attached to the ansys solver via parameter -i + +License** should be selected by parameter -p. Licensed products are +the following: aa_r (ANSYS **Academic Research), ane3fl (ANSYS +Multiphysics)-**Commercial**, aa_r_dy (ANSYS **Academic +AUTODYN)> +[ More + about licensing +here](licensing.html) + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.md new file mode 100644 index 0000000000000000000000000000000000000000..c2a86aa8574d0e7491af73766adce0e82c56bcd5 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.md @@ -0,0 +1,65 @@ +LS-DYNA +======= + +[LS-DYNA](http://www.lstc.com/) is a multi-purpose, +explicit and implicit finite element program used to analyze the +nonlinear dynamic response of structures. Its fully automated contact +analysis capability, a wide range of constitutive models to simulate a +whole range of engineering materials (steels, composites, foams, +concrete, etc.), error-checking features and the high scalability have +enabled users worldwide to solve successfully many complex +problems. >Additionally LS-DYNA is extensively used to simulate +impacts on structures from drop tests, underwater shock, explosions or +high-velocity impacts. Explosive forming, process engineering, accident +reconstruction, vehicle dynamics, thermal brake disc analysis or nuclear +safety are further areas in the broad range of possible applications. In +leading-edge research LS-DYNA is used to investigate the behaviour of +materials like composites, ceramics, concrete, or wood. Moreover, it is +used in biomechanics, human modelling, molecular structures, casting, +forging, or virtual testing. + +Anselm provides **1 commercial license of LS-DYNA without HPC** +support now. + +To run LS-DYNA in batch mode you can utilize/modify the +default lsdyna.pbs script and execute it via the qsub +command. + +` +#!/bin/bash +#PBS -l nodes=1:ppn=16 +#PBS -q qprod +#PBS -N $USER-LSDYNA-Project +#PBS -A XX-YY-ZZ + +#! Mail to user when job terminate or abort +#PBS -m ae + +#!change the working directory (default is home directory) +#cd <working directory> (working directory must exists) +WORK_DIR="/scratch/$USER/work" +cd $WORK_DIR + +echo Running on host `hostname` +echo Time is `date` +echo Directory is `pwd` + +module load lsdyna + +/apps/engineering/lsdyna/lsdyna700s i=input.k +` + +Header of the pbs file (above) is common and description can be find +on [this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +[SVS FEM](http://www.svsfem.cz) recommends to utilize +sources by keywords: nodes, ppn. These keywords allows to address +directly the number of nodes (computers) and cores (ppn) which will be +utilized in the job. Also the rest of code assumes such structure of +allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. Input file has to be defined by common LS-DYNA +.k** file which is attached to the LS-DYNA solver via parameter i= + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.md new file mode 100644 index 0000000000000000000000000000000000000000..ff0be71faecd1c1b6374e9b2d92fbf44bece4eda --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.md @@ -0,0 +1,91 @@ +Molpro +====== + +Molpro is a complete system of ab initio programs for molecular +electronic structure calculations. + +About Molpro +------------ + +Molpro is a software package used for accurate ab-initio quantum +chemistry calculations. More information can be found at the [official +webpage](http://www.molpro.net/). + +License +------- + +Molpro software package is available only to users that have a valid +license. Please contact support to enable access to Molpro if you have a +valid license appropriate for running on our cluster (eg. >academic +research group licence, parallel execution). + +To run Molpro, you need to have a valid license token present in +" $HOME/.molpro/token". You can +download the token from [Molpro +website](https://www.molpro.net/licensee/?portal=licensee). + +Installed version +----------------- + +Currently on Anselm is installed version 2010.1, patch level 45, +parallel version compiled with Intel compilers and Intel MPI. + +Compilation parameters are default : + + |Parameter|Value| + ------------------------------------------- |---|---|------------------- + |max number of atoms|200| + |max number of valence orbitals|300| + |max number of basis functions|4095| + |max number of states per symmmetry|20| + |max number of state symmetries|16| + |max number of records|200| + |max number of primitives|maxbfn x [2]| + + + +Running +------- + +Molpro is compiled for parallel execution using MPI and OpenMP. By +default, Molpro reads the number of allocated nodes from PBS and +launches a data server on one node. On the remaining allocated nodes, +compute processes are launched, one process per node, each with 16 +threads. You can modify this behavior by using -n, -t and helper-server +options. Please refer to the [Molpro +documentation](http://www.molpro.net/info/2010.1/doc/manual/node9.html) +for more details. + +The OpenMP parallelization in Molpro is limited and has been observed to +produce limited scaling. We therefore recommend to use MPI +parallelization only. This can be achieved by passing option +mpiprocs=16:ompthreads=1 to PBS. + +You are advised to use the -d option to point to a directory in [SCRATCH +filesystem](../../storage.html). Molpro can produce a +large amount of temporary data during its run, and it is important that +these are placed in the fast scratch filesystem. + +### Example jobscript + + #PBS -A IT4I-0-0 + #PBS -q qprod + #PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1 + + cd $PBS_O_WORKDIR + + # load Molpro module + module add molpro + + # create a directory in the SCRATCH filesystem + mkdir -p /scratch/$USER/$PBS_JOBID + + # copy an example input + cp /apps/chem/molpro/2010.1/molprop_2010_1_Linux_x86_64_i8/examples/caffeine_opt_diis.com . + + # run Molpro with default options + molpro -d /scratch/$USER/$PBS_JOBID caffeine_opt_diis.com + + # delete scratch directory + rm -rf /scratch/$USER/$PBS_JOBID + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.md new file mode 100644 index 0000000000000000000000000000000000000000..d52644e00840ab4ec5be1f3d856e2e2b82a83d45 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.md @@ -0,0 +1,66 @@ +NWChem +====== + +High-Performance Computational Chemistry + +Introduction +------------------------- + +NWChem aims to provide its users with computational chemistry +tools that are scalable both in their ability to treat large scientific +computational chemistry problems efficiently, and in their use of +available parallel computing resources from high-performance parallel +supercomputers to conventional workstation clusters. + +[Homepage](http://www.nwchem-sw.org/index.php/Main_Page) + +Installed versions +------------------ + +The following versions are currently installed : + +- 6.1.1, not recommended, problems have been observed with this + version + +- 6.3-rev2-patch1, current release with QMD patch applied. Compiled + with Intel compilers, MKL and Intel MPI + +- 6.3-rev2-patch1-openmpi, same as above, but compiled with OpenMPI + and NWChem provided BLAS instead of MKL. This version is expected to + be slower + +- 6.3-rev2-patch1-venus, this version contains only libraries for + VENUS interface linking. Does not provide standalone NWChem + executable + +For a current list of installed versions, execute : + + module avail nwchem + +Running +------- + +NWChem is compiled for parallel MPI execution. Normal procedure for MPI +jobs applies. Sample jobscript : + + #PBS -A IT4I-0-0 + #PBS -q qprod + #PBS -l select=1:ncpus=16 + + module add nwchem/6.3-rev2-patch1 + mpirun -np 16 nwchem h2o.nw + +Options +-------------------- + +Please refer to [the +documentation](http://www.nwchem-sw.org/index.php/Release62:Top-level) and +in the input file set the following directives : + +- >MEMORY : controls the amount of memory NWChem will use +- >SCRATCH_DIR : set this to a directory in [SCRATCH + filesystem](../../storage.html#scratch) (or run the + calculation completely in a scratch directory). For certain + calculations, it might be advisable to reduce I/O by forcing + "direct" mode, eg. "scf direct" + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/compilers.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/compilers.md new file mode 100644 index 0000000000000000000000000000000000000000..7450d0e1457ceb3e6d5230031834c6284a68c1e5 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/compilers.md @@ -0,0 +1,163 @@ +Compilers +========= + +Available compilers, including GNU, INTEL and UPC compilers + + + +Currently there are several compilers for different programming +languages available on the Anselm cluster: + +- C/C++ +- Fortran 77/90/95 +- Unified Parallel C +- Java +- nVidia CUDA + + + +The C/C++ and Fortran compilers are divided into two main groups GNU and +Intel. + +Intel Compilers +--------------- + +For information about the usage of Intel Compilers and other Intel +products, please read the [Intel Parallel +studio](intel-suite.html) page. + +GNU C/C++ and Fortran Compilers +------------------------------- + +For compatibility reasons there are still available the original (old +4.4.6-4) versions of GNU compilers as part of the OS. These are +accessible in the search path by default. + +It is strongly recommended to use the up to date version (4.8.1) which +comes with the module gcc: + + $ module load gcc + $ gcc -v + $ g++ -v + $ gfortran -v + +With the module loaded two environment variables are predefined. One for +maximum optimizations on the Anselm cluster architecture, and the other +for debugging purposes: + + $ echo $OPTFLAGS + -O3 -march=corei7-avx + + $ echo $DEBUGFLAGS + -O0 -g + +For more informations about the possibilities of the compilers, please +see the man pages. + +Unified Parallel C +------------------ + +UPC is supported by two compiler/runtime implementations: + +- GNU - SMP/multi-threading support only +- Berkley - multi-node support as well as SMP/multi-threading support + +### GNU UPC Compiler + +To use the GNU UPC compiler and run the compiled binaries use the module +gupc + + $ module add gupc + $ gupc -v + $ g++ -v + +Simple program to test the compiler + + $ cat count.upc + + /* hello.upc - a simple UPC example */ + #include <upc.h> + #include <stdio.h> + + int main() { +  if (MYTHREAD == 0) { +    printf("Welcome to GNU UPC!!!n"); +  } +  upc_barrier; +  printf(" - Hello from thread %in", MYTHREAD); +  return 0; + } + +To compile the example use + + $ gupc -o count.upc.x count.upc + +To run the example with 5 threads issue + + $ ./count.upc.x -fupc-threads-5 + +For more informations see the man pages. + +### Berkley UPC Compiler + +To use the Berkley UPC compiler and runtime environment to run the +binaries use the module bupc + + $ module add bupc + $ upcc -version + +As default UPC network the "smp" is used. This is very quick and easy +way for testing/debugging, but limited to one node only. + +For production runs, it is recommended to use the native Infiband +implementation of UPC network "ibv". For testing/debugging using +multiple nodes, the "mpi" UPC network is recommended. Please note, that +the selection of the network is done at the compile time** and not at +runtime (as expected)! + +Example UPC code: + + $ cat hello.upc + + /* hello.upc - a simple UPC example */ + #include <upc.h> + #include <stdio.h> + + int main() { +  if (MYTHREAD == 0) { +    printf("Welcome to Berkeley UPC!!!n"); +  } +  upc_barrier; +  printf(" - Hello from thread %in", MYTHREAD); +  return 0; + } + +To compile the example with the "ibv" UPC network use + + $ upcc -network=ibv -o hello.upc.x hello.upc + +To run the example with 5 threads issue + + $ upcrun -n 5 ./hello.upc.x + +To run the example on two compute nodes using all 32 cores, with 32 +threads, issue + + $ qsub -I -q qprod -A PROJECT_ID -l select=2:ncpus=16 + $ module add bupc + $ upcrun -n 32 ./hello.upc.x + + For more informations see the man pages. + +Java +---- + +For information how to use Java (runtime and/or compiler), please read +the [Java page](java.html). + +nVidia CUDA +----------- + +For information how to work with nVidia CUDA, please read the [nVidia +CUDA page](nvidia-cuda.html). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.md new file mode 100644 index 0000000000000000000000000000000000000000..25f1c4d0fd1cf538124f230a2297a279820118ce --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.md @@ -0,0 +1,204 @@ +COMSOL Multiphysics® +==================== + + + +Introduction + +------------------------- + +[COMSOL](http://www.comsol.com) +is a powerful environment for modelling and solving various engineering +and scientific problems based on partial differential equations. COMSOL +is designed to solve coupled or multiphysics phenomena. For many +standard engineering problems COMSOL provides add-on products such as +electrical, mechanical, fluid flow, and chemical +applications. + +- >[Structural Mechanics + Module](http://www.comsol.com/structural-mechanics-module), + + +- >[Heat Transfer + Module](http://www.comsol.com/heat-transfer-module), + + +- >[CFD + Module](http://www.comsol.com/cfd-module), + + +- >[Acoustics + Module](http://www.comsol.com/acoustics-module), + + +- >and [many + others](http://www.comsol.com/products) + +COMSOL also allows an +interface support for +equation-based modelling of +partial differential +equations. + +Execution + +---------------------- + +On the Anselm cluster COMSOL is available in the latest +stable version. There are two variants of the release: + +- >**Non commercial** or so + called >**EDU + variant**>, which can be used for research + and educational purposes. + +- >**Commercial** or so called + >**COM variant**, + which can used also for commercial activities. + >**COM variant** + has only subset of features compared to the + >**EDU + variant**> available. + + More about + licensing will be posted here + soon. + + +To load the of COMSOL load the module + +` +$ module load comsol +` + +By default the **EDU +variant**> will be loaded. If user needs other +version or variant, load the particular version. To obtain the list of +available versions use + +` +$ module avail comsol +` + +If user needs to prepare COMSOL jobs in the interactive mode +it is recommend to use COMSOL on the compute nodes via PBS Pro +scheduler. In order run the COMSOL Desktop GUI on Windows is recommended +to use the [Virtual Network Computing +(VNC)](https://docs.it4i.cz/anselm-cluster-documentation/software/comsol/resolveuid/11e53ad0d2fd4c5187537f4baeedff33). + +` +$ xhost + +$ qsub -I -X -A PROJECT_ID -q qprod -l select=1:ncpus=16 +$ module load comsol +$ comsol +` + +To run COMSOL in batch mode, without the COMSOL Desktop GUI +environment, user can utilized the default (comsol.pbs) job script and +execute it via the qsub command. + +` +#!/bin/bash +#PBS -l select=3:ncpus=16 +#PBS -q qprod +#PBS -N JOB_NAME +#PBS -A PROJECT_ID + +cd /scratch/$USER/ || exit + +echo Time is `date` +echo Directory is `pwd` +echo '**PBS_NODEFILE***START*******' +cat $PBS_NODEFILE +echo '**PBS_NODEFILE***END*********' + +text_nodes < cat $PBS_NODEFILE + +module load comsol +# module load comsol/43b-COM + +ntask=$(wc -l $PBS_NODEFILE) + +comsol -nn ${ntask} batch -configuration /tmp –mpiarg –rmk –mpiarg pbs -tmpdir /scratch/$USER/ -inputfile name_input_f.mph -outputfile name_output_f.mph -batchlog name_log_f.log +` + +Working directory has to be created before sending the +(comsol.pbs) job script into the queue. Input file (name_input_f.mph) +has to be in working directory or full path to input file has to be +specified. The appropriate path to the temp directory of the job has to +be set by command option (-tmpdir). + +LiveLink™* *for MATLAB®^ +------------------------- + +COMSOL is the software package for the numerical solution of +the partial differential equations. LiveLink for MATLAB allows +connection to the +COMSOL>><span><span><span><span>**®**</span>^ +API (Application Programming Interface) with the benefits of the +programming language and computing environment of the MATLAB. + +LiveLink for MATLAB is available in both +**EDU** and +**COM** +**variant** of the +COMSOL release. On Anselm 1 commercial +(>**COM**) license +and the 5 educational +(>**EDU**) licenses +of LiveLink for MATLAB (please see the [ISV +Licenses](../isv_licenses.html)) are available. +Following example shows how to start COMSOL model from MATLAB via +LiveLink in the interactive mode. + +` +$ xhost + +$ qsub -I -X -A PROJECT_ID -q qexp -l select=1:ncpus=16 +$ module load matlab +$ module load comsol +$ comsol server matlab +` + +At the first time to launch the LiveLink for MATLAB +(client-MATLAB/server-COMSOL connection) the login and password is +requested and this information is not requested again. + +To run LiveLink for MATLAB in batch mode with +(comsol_matlab.pbs) job script you can utilize/modify the following +script and execute it via the qsub command. + +` +#!/bin/bash +#PBS -l select=3:ncpus=16 +#PBS -q qprod +#PBS -N JOB_NAME +#PBS -A PROJECT_ID + +cd /scratch/$USER || exit + +echo Time is `date` +echo Directory is `pwd` +echo '**PBS_NODEFILE***START*******' +cat $PBS_NODEFILE +echo '**PBS_NODEFILE***END*********' + +text_nodes < cat $PBS_NODEFILE + +module load matlab +module load comsol/43b-EDU + +ntask=$(wc -l $PBS_NODEFILE) + +comsol -nn ${ntask} server -configuration /tmp -mpiarg -rmk -mpiarg pbs -tmpdir /scratch/$USER & +cd /apps/engineering/comsol/comsol43b/mli +matlab -nodesktop -nosplash -r "mphstart; addpath /scratch/$USER; test_job" +` + +This example shows how to run Livelink for MATLAB with following +configuration: 3 nodes and 16 cores per node. Working directory has to +be created before submitting (comsol_matlab.pbs) job script into the +queue. Input file (test_job.m) has to be in working directory or full +path to input file has to be specified. The Matlab command option (-r +”mphstart”) created a connection with a COMSOL server using the default +port number. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.md new file mode 100644 index 0000000000000000000000000000000000000000..f24688bd8fb51a85628a030f8f76379432820952 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.md @@ -0,0 +1,89 @@ +Debuggers and profilers summary +=============================== + + + +Introduction +------------ + +We provide state of the art programms and tools to develop, profile and +debug HPC codes at IT4Innovations. +On these pages, we provide an overview of the profiling and debugging +tools available on Anslem at IT4I. + +Intel debugger +-------------- + +The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](https://docs.it4i.cz/anselm-cluster-documentation/software/debuggers/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) +for running the GUI. + + $ module load intel + $ idb + +Read more at the [Intel +Debugger](intel-suite/intel-debugger.html) page. + +Allinea Forge (DDT/MAP) +----------------------- + +Allinea DDT, is a commercial debugger primarily for debugging parallel +MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel +Xeon Phi accelerators. DDT provides all the standard debugging features +(stack trace, breakpoints, watches, view variables, threads etc.) for +every thread running as part of your program, or for every process - +even if these processes are distributed across a cluster using an MPI +implementation. + + $ module load Forge + $ forge + +Read more at the [Allinea +DDT](debuggers/allinea-ddt.html) page. + +Allinea Performance Reports +--------------------------- + +Allinea Performance Reports characterize the performance of HPC +application runs. After executing your application through the tool, a +synthetic HTML report is generated automatically, containing information +about several metrics along with clear behavior statements and hints to +help you improve the efficiency of your runs. Our license is limited to +64 MPI processes. + + $ module load PerformanceReports/6.0 + $ perf-report mpirun -n 64 ./my_application argument01 argument02 + +Read more at the [Allinea Performance +Reports](debuggers/allinea-performance-reports.html) +page. + +RougeWave Totalview +------------------- + +TotalView is a source- and machine-level debugger for multi-process, +multi-threaded programs. Its wide range of tools provides ways to +analyze, organize, and test programs, making it easy to isolate and +identify problems in individual threads and processes in programs of +great complexity. + + $ module load totalview + $ totalview + +Read more at the [Totalview](debuggers/total-view.html) +page. + +Vampir trace analyzer +--------------------- + +Vampir is a GUI trace analyzer for traces in OTF format. + + $ module load Vampir/8.5.0 + $ vampir + +Read more at +the [Vampir](../../salomon/software/debuggers/vampir.html) page. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20141204v12.56.36.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20141204v12.56.36.png new file mode 100644 index 0000000000000000000000000000000000000000..2d0009c34c6c861c018b7999079cbd5aa6e0401d Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20141204v12.56.36.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20160708v12.33.35.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20160708v12.33.35.png new file mode 100644 index 0000000000000000000000000000000000000000..d8ea15508f0714eeacfadff6d85fe8cafe5c406b Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20160708v12.33.35.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.md new file mode 100644 index 0000000000000000000000000000000000000000..237f66e9b50348ea499370f53427860f0c779afb --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.md @@ -0,0 +1,129 @@ +Allinea Forge (DDT,MAP) +======================= + + + +Allinea Forge consist of two tools - debugger DDT and profiler MAP. + +Allinea DDT, is a commercial debugger primarily for debugging parallel +MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel +Xeon Phi accelerators. DDT provides all the standard debugging features +(stack trace, breakpoints, watches, view variables, threads etc.) for +every thread running as part of your program, or for every process - +even if these processes are distributed across a cluster using an MPI +implementation. + +Allinea MAP is a profiler for C/C++/Fortran HPC codes. It is designed +for profiling parallel code, which uses pthreads, OpenMP or MPI. + +License and Limitations for Anselm Users +---------------------------------------- + +On Anselm users can debug OpenMP or MPI code that runs up to 64 parallel +processes. In case of debugging GPU or Xeon Phi accelerated codes the +limit is 8 accelerators. These limitation means that: + +- 1 user can debug up 64 processes, or +- 32 users can debug 2 processes, etc. + +In case of debugging on accelerators: + +- 1 user can debug on up to 8 accelerators, or +- 8 users can debug on single accelerator. + +Compiling Code to run with DDT +------------------------------ + +### Modules + +Load all necessary modules to compile the code. For example: + + $ module load intel + $ module load impi ... or ... module load openmpi/X.X.X-icc + +Load the Allinea DDT module: + + $ module load Forge + +Compile the code: + +` +$ mpicc -g -O0 -o test_debug test.c + +$ mpif90 -g -O0 -o test_debug test.f +` + + + +### Compiler flags + +Before debugging, you need to compile your code with theses flags: + +-g** : Generates extra debugging information usable by GDB. -g3** +includes even more debugging information. This option is available for +GNU and INTEL C/C++ and Fortran compilers. + +-O0** : Suppress all optimizations.** + + + +Starting a Job with DDT +----------------------- + +Be sure to log in with an X window +forwarding enabled. This could mean using the -X in the ssh:  + + $ ssh -X username@anselm.it4i.cz + +Other options is to access login node using VNC. Please see the detailed +information on how to [use graphic user interface on +Anselm](https://docs.it4i.cz/anselm-cluster-documentation/software/debuggers/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) +. + +From the login node an interactive session **with X windows forwarding** +(-X option) can be started by following command: + + $ qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=16:mpiprocs=16,walltime=01:00:00 + +Then launch the debugger with the ddt command followed by the name of +the executable to debug: + + $ ddt test_debug + +A submission window that appears have +a prefilled path to the executable to debug. You can select the number +of MPI processors and/or OpenMP threads on which to run and press run. +Command line arguments to a program can be entered to the +"Arguments " +box. + + + +To start the debugging directly without the submission window, user can +specify the debugging and execution parameters from the command line. +For example the number of MPI processes is set by option "-np 4". +Skipping the dialog is done by "-start" option. To see the list of the +"ddt" command line parameters, run "ddt --help".  + + ddt -start -np 4 ./hello_debug_impi + + + +Documentation +------------- + +Users can find original User Guide after loading the DDT module: + + $DDTPATH/doc/userguide.pdf + + + + + + [1] Discipline, Magic, Inspiration and Science: Best Practice +Debugging with Allinea DDT, Workshop conducted at LLNL by Allinea on May +10, 2013, +[link](https://computing.llnl.gov/tutorials/allineaDDT/index.html) + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.md new file mode 100644 index 0000000000000000000000000000000000000000..3c2e3ee645fc31b55e0cc6132c6a31f160674826 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.md @@ -0,0 +1,77 @@ +Allinea Performance Reports +=========================== + +quick application profiling + + + +Introduction +------------ + +Allinea Performance Reports characterize the performance of HPC +application runs. After executing your application through the tool, a +synthetic HTML report is generated automatically, containing information +about several metrics along with clear behavior statements and hints to +help you improve the efficiency of your runs. + +The Allinea Performance Reports is most useful in profiling MPI +programs. + +Our license is limited to 64 MPI processes. + +Modules +------- + +Allinea Performance Reports version 6.0 is available + + $ module load PerformanceReports/6.0 + +The module sets up environment variables, required for using the Allinea +Performance Reports. This particular command loads the default module, +which is performance reports version 4.2. + +Usage +----- + +Use the the perf-report wrapper on your (MPI) program. + +Instead of [running your MPI program the usual +way](../mpi-1.html), use the the perf report wrapper: + + $ perf-report mpirun ./mympiprog.x + +The mpi program will run as usual. The perf-report creates two +additional files, in *.txt and *.html format, containing the +performance report. Note that [demanding MPI codes should be run within +the queue +system](../../resource-allocation-and-job-execution/job-submission-and-execution.html). + +Example +------- + +In this example, we will be profiling the mympiprog.x MPI program, using +Allinea performance reports. Assume that the code is compiled with intel +compilers and linked against intel MPI library: + +First, we allocate some nodes via the express queue: + + $ qsub -q qexp -l select=2:ncpus=16:mpiprocs=16:ompthreads=1 -I + qsub: waiting for job 262197.dm2 to start + qsub: job 262197.dm2 ready + +Then we load the modules and run the program the usual way: + + $ module load intel impi allinea-perf-report/4.2 + $ mpirun ./mympiprog.x + +Now lets profile the code: + + $ perf-report mpirun ./mympiprog.x + +Performance report files +[mympiprog_32p*.txt](mympiprog_32p_2014-10-15_16-56.txt) +and +[mympiprog_32p*.html](mympiprog_32p_2014-10-15_16-56.html) +were created. We can see that the code is very efficient on MPI and is +CPU bounded. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.md new file mode 100644 index 0000000000000000000000000000000000000000..008d86c04f18021fcc536afb7da083be182cd959 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.md @@ -0,0 +1,65 @@ +CUBE +==== + +Introduction +------------ + +CUBE is a graphical performance report explorer for displaying data from +Score-P and Scalasca (and other compatible tools). The name comes from +the fact that it displays performance data in a three-dimensions : + +- **performance metric**, where a number of metrics are available, + such as communication time or cache misses, +- **call path**, which contains the call tree of your program +- s**ystem resource**, which contains system's nodes, processes and + threads, depending on the parallel programming model. + +Each dimension is organized in a tree, for example the time performance +metric is divided into Execution time and Overhead time, call path +dimension is organized by files and routines in your source code etc. + + + +*Figure 1. Screenshot of CUBE displaying data from Scalasca.* + +* +*Each node in the tree is colored by severity (the color scheme is +displayed at the bottom of the window, ranging from the least severe +blue to the most severe being red). For example in Figure 1, we can see +that most of the point-to-point MPI communication happens in routine +exch_qbc, colored red. + +Installed versions +------------------ + +Currently, there are two versions of CUBE 4.2.3 available as +[modules](../../environment-and-modules.html) : + +- class="s1"> cube/4.2.3-gcc, + compiled with GCC + +- class="s1"> cube/4.2.3-icc, + compiled with Intel compiler + +Usage +----- + +CUBE is a graphical application. Refer to [Graphical User Interface +documentation](https://docs.it4i.cz/anselm-cluster-documentation/software/debuggers/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) +for a list of methods to launch graphical applications on Anselm. + +Analyzing large data sets can consume large amount of CPU and RAM. Do +not perform large analysis on login nodes. + +After loading the apropriate module, simply launch +cube command, or alternatively you can use + scalasca -examine command to launch the +GUI. Note that for Scalasca datasets, if you do not analyze the data +with > scalasca +-examine before to opening them with CUBE, not all +performance data will be available. + + >References + +1. <http://www.scalasca.org/software/cube-4.x/download.html> + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/ddt1.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/ddt1.png new file mode 100644 index 0000000000000000000000000000000000000000..57a18f48908f0b8a4857bc14a820b450fcdd9652 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/ddt1.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.md new file mode 100644 index 0000000000000000000000000000000000000000..eeb8206b2c86d2cc09b8ae5f1e84409b700a597b --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.md @@ -0,0 +1,299 @@ +Intel Performance Counter Monitor +================================= + +Introduction +------------ + +Intel PCM (Performance Counter Monitor) is a tool to monitor performance +hardware counters on Intel>® processors, similar to +[PAPI](papi.html). The difference between PCM and PAPI +is that PCM supports only Intel hardware, but PCM can monitor also +uncore metrics, like memory controllers and >QuickPath Interconnect +links. + +Installed version +------------------------------ + +Currently installed version 2.6. To load the +[module](../../environment-and-modules.html), issue : + + $ module load intelpcm + +Command line tools +------------------ + +PCM provides a set of tools to monitor system/or application. + +### pcm-memory + +Measures memory bandwidth of your application or the whole system. +Usage: + + $ pcm-memory.x <delay>|[external_program parameters] + +Specify either a delay of updates in seconds or an external program to +monitor. If you get an error about PMU in use, respond "y" and relaunch +the program. + +Sample output: + + ---------------------------------------||--------------------------------------- + -- Socket 0 --||-- Socket 1 -- + ---------------------------------------||--------------------------------------- + ---------------------------------------||--------------------------------------- + ---------------------------------------||--------------------------------------- + -- Memory Performance Monitoring --||-- Memory Performance Monitoring -- + ---------------------------------------||--------------------------------------- + -- Mem Ch 0: Reads (MB/s): 2.44 --||-- Mem Ch 0: Reads (MB/s): 0.26 -- + -- Writes(MB/s): 2.16 --||-- Writes(MB/s): 0.08 -- + -- Mem Ch 1: Reads (MB/s): 0.35 --||-- Mem Ch 1: Reads (MB/s): 0.78 -- + -- Writes(MB/s): 0.13 --||-- Writes(MB/s): 0.65 -- + -- Mem Ch 2: Reads (MB/s): 0.32 --||-- Mem Ch 2: Reads (MB/s): 0.21 -- + -- Writes(MB/s): 0.12 --||-- Writes(MB/s): 0.07 -- + -- Mem Ch 3: Reads (MB/s): 0.36 --||-- Mem Ch 3: Reads (MB/s): 0.20 -- + -- Writes(MB/s): 0.13 --||-- Writes(MB/s): 0.07 -- + -- NODE0 Mem Read (MB/s): 3.47 --||-- NODE1 Mem Read (MB/s): 1.45 -- + -- NODE0 Mem Write (MB/s): 2.55 --||-- NODE1 Mem Write (MB/s): 0.88 -- + -- NODE0 P. Write (T/s) : 31506 --||-- NODE1 P. Write (T/s): 9099 -- + -- NODE0 Memory (MB/s): 6.02 --||-- NODE1 Memory (MB/s): 2.33 -- + ---------------------------------------||--------------------------------------- + -- System Read Throughput(MB/s): 4.93 -- + -- System Write Throughput(MB/s): 3.43 -- + -- System Memory Throughput(MB/s): 8.35 -- + ---------------------------------------||--------------------------------------- + +### pcm-msr + +Command pcm-msr.x can be used to +read/write model specific registers of the CPU. + +### pcm-numa + +NUMA monitoring utility does not work on Anselm. + +### pcm-pcie + +Can be used to monitor PCI Express bandwith. Usage: +pcm-pcie.x <delay> + +### pcm-power + +Displays energy usage and thermal headroom for CPU and DRAM sockets. +Usage: > pcm-power.x <delay> | +<external program> + +### pcm + +This command provides an overview of performance counters and memory +usage. >Usage: > pcm.x +<delay> | <external program> + +Sample output : + + $ pcm.x ./matrix + + Intel(r) Performance Counter Monitor V2.6 (2013-11-04 13:43:31 +0100 ID=db05e43) + + Copyright (c) 2009-2013 Intel Corporation + + Number of physical cores: 16 + Number of logical cores: 16 + Threads (logical cores) per physical core: 1 + Num sockets: 2 + Core PMU (perfmon) version: 3 + Number of core PMU generic (programmable) counters: 8 + Width of generic (programmable) counters: 48 bits + Number of core PMU fixed counters: 3 + Width of fixed counters: 48 bits + Nominal core frequency: 2400000000 Hz + Package thermal spec power: 115 Watt; Package minimum power: 51 Watt; Package maximum power: 180 Watt; + Socket 0: 1 memory controllers detected with total number of 4 channels. 2 QPI ports detected. + Socket 1: 1 memory controllers detected with total number of 4 channels. 2 QPI ports detected. + Number of PCM instances: 2 + Max QPI link speed: 16.0 GBytes/second (8.0 GT/second) + + Detected Intel(R) Xeon(R) CPU E5-2665 0 @ 2.40GHz "Intel(r) microarchitecture codename Sandy Bridge-EP/Jaketown" + + Executing "./matrix" command: + + Exit code: 0 + + EXEC : instructions per nominal CPU cycle + IPC : instructions per CPU cycle + FREQ : relation to nominal CPU frequency='unhalted clock ticks'/'invariant timer ticks' (includes Intel Turbo Boost) + AFREQ : relation to nominal CPU frequency while in active state (not in power-saving C state)='unhalted clock ticks'/'invariant timer ticks while in C0-state' (includes Intel Turbo Boost) + L3MISS: L3 cache misses + L2MISS: L2 cache misses (including other core's L2 cache *hits*) + L3HIT : L3 cache hit ratio (0.00-1.00) + L2HIT : L2 cache hit ratio (0.00-1.00) + L3CLK : ratio of CPU cycles lost due to L3 cache misses (0.00-1.00), in some cases could be >1.0 due to a higher memory latency + L2CLK : ratio of CPU cycles lost due to missing L2 cache but still hitting L3 cache (0.00-1.00) + READ : bytes read from memory controller (in GBytes) + WRITE : bytes written to memory controller (in GBytes) + TEMP : Temperature reading in 1 degree Celsius relative to the TjMax temperature (thermal headroom): 0 corresponds to the max temperature + + Core (SKT) | EXEC | IPC | FREQ | AFREQ | L3MISS | L2MISS | L3HIT | L2HIT | L3CLK | L2CLK | READ | WRITE | TEMP + + 0 0 0.00 0.64 0.01 0.80 5592 11 K 0.49 0.13 0.32 0.06 N/A N/A 67 + 1 0 0.00 0.18 0.00 0.69 3086 5552 0.44 0.07 0.48 0.08 N/A N/A 68 + 2 0 0.00 0.23 0.00 0.81 300 562 0.47 0.06 0.43 0.08 N/A N/A 67 + 3 0 0.00 0.21 0.00 0.99 437 862 0.49 0.06 0.44 0.09 N/A N/A 73 + 4 0 0.00 0.23 0.00 0.93 293 559 0.48 0.07 0.42 0.09 N/A N/A 73 + 5 0 0.00 0.21 0.00 1.00 423 849 0.50 0.06 0.43 0.10 N/A N/A 69 + 6 0 0.00 0.23 0.00 0.94 285 558 0.49 0.06 0.41 0.09 N/A N/A 71 + 7 0 0.00 0.18 0.00 0.81 674 1130 0.40 0.05 0.53 0.08 N/A N/A 65 + 8 1 0.00 0.47 0.01 1.26 6371 13 K 0.51 0.35 0.31 0.07 N/A N/A 64 + 9 1 2.30 1.80 1.28 1.29 179 K 15 M 0.99 0.59 0.04 0.71 N/A N/A 60 + 10 1 0.00 0.22 0.00 1.26 315 570 0.45 0.06 0.43 0.08 N/A N/A 67 + 11 1 0.00 0.23 0.00 0.74 321 579 0.45 0.05 0.45 0.07 N/A N/A 66 + 12 1 0.00 0.22 0.00 1.25 305 570 0.46 0.05 0.42 0.07 N/A N/A 68 + 13 1 0.00 0.22 0.00 1.26 336 581 0.42 0.04 0.44 0.06 N/A N/A 69 + 14 1 0.00 0.22 0.00 1.25 314 565 0.44 0.06 0.43 0.07 N/A N/A 69 + 15 1 0.00 0.29 0.00 1.19 2815 6926 0.59 0.39 0.29 0.08 N/A N/A 69 + ------------------------------------------------------------------------------------------------------------------- + SKT 0 0.00 0.46 0.00 0.79 11 K 21 K 0.47 0.10 0.38 0.07 0.00 0.00 65 + SKT 1 0.29 1.79 0.16 1.29 190 K 15 M 0.99 0.59 0.05 0.70 0.01 0.01 61 + ------------------------------------------------------------------------------------------------------------------- + TOTAL * 0.14 1.78 0.08 1.28 201 K 15 M 0.99 0.59 0.05 0.70 0.01 0.01 N/A + + Instructions retired: 1345 M ; Active cycles: 755 M ; Time (TSC): 582 Mticks ; C0 (active,non-halted) core residency: 6.30 % + + C1 core residency: 0.14 %; C3 core residency: 0.20 %; C6 core residency: 0.00 %; C7 core residency: 93.36 %; + C2 package residency: 48.81 %; C3 package residency: 0.00 %; C6 package residency: 0.00 %; C7 package residency: 0.00 %; + + PHYSICAL CORE IPC : 1.78 => corresponds to 44.50 % utilization for cores in active state + Instructions per nominal CPU cycle: 0.14 => corresponds to 3.60 % core utilization over time interval + + Intel(r) QPI data traffic estimation in bytes (data traffic coming to CPU/socket through QPI links): + + QPI0 QPI1 | QPI0 QPI1 + ---------------------------------------------------------------------------------------------- + SKT 0 0 0 | 0% 0% + SKT 1 0 0 | 0% 0% + ---------------------------------------------------------------------------------------------- + Total QPI incoming data traffic: 0 QPI data traffic/Memory controller traffic: 0.00 + + Intel(r) QPI traffic estimation in bytes (data and non-data traffic outgoing from CPU/socket through QPI links): + + QPI0 QPI1 | QPI0 QPI1 + ---------------------------------------------------------------------------------------------- + SKT 0 0 0 | 0% 0% + SKT 1 0 0 | 0% 0% + ---------------------------------------------------------------------------------------------- + Total QPI outgoing data and non-data traffic: 0 + + ---------------------------------------------------------------------------------------------- + SKT 0 package consumed 4.06 Joules + SKT 1 package consumed 9.40 Joules + ---------------------------------------------------------------------------------------------- + TOTAL: 13.46 Joules + + ---------------------------------------------------------------------------------------------- + SKT 0 DIMMs consumed 4.18 Joules + SKT 1 DIMMs consumed 4.28 Joules + ---------------------------------------------------------------------------------------------- + TOTAL: 8.47 Joules + Cleaning up + + + +### pcm-sensor + +Can be used as a sensor for ksysguard GUI, which is currently not +installed on Anselm. + +API +--- + +In a similar fashion to PAPI, PCM provides a C++ API to access the +performance counter from within your application. Refer to the [doxygen +documentation](http://intel-pcm-api-documentation.github.io/classPCM.html) +for details of the API. + +Due to security limitations, using PCM API to monitor your applications +is currently not possible on Anselm. (The application must be run as +root user) + +Sample program using the API : + + #include <stdlib.h> + #include <stdio.h> + #include "cpucounters.h" + + #define SIZE 1000 + + using namespace std; + + int main(int argc, char **argv) { + float matrixa[SIZE][SIZE], matrixb[SIZE][SIZE], mresult[SIZE][SIZE]; + float real_time, proc_time, mflops; + long long flpins; + int retval; + int i,j,k; + + PCM * m = PCM::getInstance(); + + if (m->program() != PCM::Success) return 1; + + SystemCounterState before_sstate = getSystemCounterState(); + + /* Initialize the Matrix arrays */ + for ( i=0; i<SIZE*SIZE; i++ ){ + mresult[0][i] = 0.0; + matrixa[0][i] = matrixb[0][i] = rand()*(float)1.1; } + + /* A naive Matrix-Matrix multiplication */ + for (i=0;i<SIZE;i++) + for(j=0;j<SIZE;j++) + for(k=0;k<SIZE;k++) + mresult[i][j]=mresult[i][j] + matrixa[i][k]*matrixb[k][j]; + + SystemCounterState after_sstate = getSystemCounterState(); + + cout << "Instructions per clock:" << getIPC(before_sstate,after_sstate) + << "L3 cache hit ratio:" << getL3CacheHitRatio(before_sstate,after_sstate) + << "Bytes read:" << getBytesReadFromMC(before_sstate,after_sstate); + + for (i=0; i<SIZE;i++) + for (j=0; j<SIZE; j++) + if (mresult[i][j] == -1) printf("x"); + + return 0; + } + +Compile it with : + + $ icc matrix.cpp -o matrix -lpthread -lpcm + +Sample output : + + $ ./matrix + Number of physical cores: 16 + Number of logical cores: 16 + Threads (logical cores) per physical core: 1 + Num sockets: 2 + Core PMU (perfmon) version: 3 + Number of core PMU generic (programmable) counters: 8 + Width of generic (programmable) counters: 48 bits + Number of core PMU fixed counters: 3 + Width of fixed counters: 48 bits + Nominal core frequency: 2400000000 Hz + Package thermal spec power: 115 Watt; Package minimum power: 51 Watt; Package maximum power: 180 Watt; + Socket 0: 1 memory controllers detected with total number of 4 channels. 2 QPI ports detected. + Socket 1: 1 memory controllers detected with total number of 4 channels. 2 QPI ports detected. + Number of PCM instances: 2 + Max QPI link speed: 16.0 GBytes/second (8.0 GT/second) + Instructions per clock:1.7 + L3 cache hit ratio:1.0 + Bytes read:12513408 + +References +---------- + +1. <https://software.intel.com/en-us/articles/intel-performance-counter-monitor-a-better-way-to-measure-cpu-utilization> +2. <https://software.intel.com/sites/default/files/m/3/2/2/xeon-e5-2600-uncore-guide.pdf> Intel® + Xeon® Processor E5-2600 Product Family Uncore Performance + Monitoring Guide. +3. <http://intel-pcm-api-documentation.github.io/classPCM.html> API + Documentation + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.md new file mode 100644 index 0000000000000000000000000000000000000000..b22b73f2b336d267a0279892d251d20b30381838 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.md @@ -0,0 +1,109 @@ +Intel VTune Amplifier +===================== + + + +Introduction +------------ + +Intel*® *VTune™ >Amplifier, part of Intel Parallel studio, is a GUI +profiling tool designed for Intel processors. It offers a graphical +performance analysis of single core and multithreaded applications. A +highlight of the features: + +- Hotspot analysis +- Locks and waits analysis +- Low level specific counters, such as branch analysis and memory + bandwidth +- Power usage analysis - frequency and sleep states. + + + +Usage +----- + +To launch the GUI, first load the module: + + $ module add VTune/2016_update1 + + class="s1">and launch the GUI : + + $ amplxe-gui + +To profile an application with VTune Amplifier, special kernel +modules need to be loaded. The modules are not loaded on Anselm login +nodes, thus direct profiling on login nodes is not possible. Use VTune +on compute nodes and refer to the documentation on [using GUI +applications](https://docs.it4i.cz/anselm-cluster-documentation/software/debuggers/resolveuid/11e53ad0d2fd4c5187537f4baeedff33). + +The GUI will open in new window. Click on "*New Project...*" to +create a new project. After clicking *OK*, a new window with project +properties will appear.  At "*Application:*", select the bath to your +binary you want to profile (the binary should be compiled with -g flag). +Some additional options such as command line arguments can be selected. +At "*Managed code profiling mode:*" select "*Native*" (unless you want +to profile managed mode .NET/Mono applications). After clicking *OK*, +your project is created. + +To run a new analysis, click "*New analysis...*". You will see a list of +possible analysis. Some of them will not be possible on the current CPU +(eg. Intel Atom analysis is not possible on Sandy Bridge CPU), the GUI +will show an error box if you select the wrong analysis. For example, +select "*Advanced Hotspots*". Clicking on *Start *will start profiling +of the application. + +Remote Analysis +--------------- + +VTune Amplifier also allows a form of remote analysis. In this mode, +data for analysis is collected from the command line without GUI, and +the results are then loaded to GUI on another machine. This allows +profiling without interactive graphical jobs. To perform a remote +analysis, launch a GUI somewhere, open the new analysis window and then +click the button "*Command line*" in bottom right corner. It will show +the command line needed to perform the selected analysis. + +The command line will look like this: + + /apps/all/VTune/2016_update1/vtune_amplifier_xe_2016.1.1.434111/bin64/amplxe-cl -collect advanced-hotspots -knob collection-detail=stack-and-callcount -mrte-mode=native -target-duration-type=veryshort -app-working-dir /home/sta545/test -- /home/sta545/test_pgsesv + +Copy the line to clipboard and then you can paste it in your jobscript +or in command line. After the collection is run, open the GUI once +again, click the menu button in the upper right corner, and select +"*Open > Result...*". The GUI will load the results from the run. + +Xeon Phi +-------- + +This section is outdated. It will be updated with new information soon. + +It is possible to analyze both native and offload Xeon Phi applications. +For offload mode, just specify the path to the binary. For native mode, +you need to specify in project properties: + +Application: ssh + +Application parameters: mic0 source ~/.profile +&& /path/to/your/bin + +Note that we include source ~/.profile +in the command to setup environment paths [as described +here](../intel-xeon-phi.html). + +If the analysis is interrupted or aborted, further analysis on the card +might be impossible and you will get errors like "ERROR connecting to +MIC card". In this case please contact our support to reboot the MIC +card. + +You may also use remote analysis to collect data from the MIC and then +analyze it in the GUI later : + + $ amplxe-cl -collect knc-hotspots -no-auto-finalize -- ssh mic0 + "export LD_LIBRARY_PATH=/apps/intel/composer_xe_2015.2.164/compiler/lib/mic/:/apps/intel/composer_xe_2015.2.164/mkl/lib/mic/; export KMP_AFFINITY=compact; /tmp/app.mic" + +References +---------- + +1. ><https://www.rcac.purdue.edu/tutorials/phi/PerformanceTuningXeonPhi-Tullos.pdf> Performance + Tuning for Intel® Xeon Phi™ Coprocessors + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.md new file mode 100644 index 0000000000000000000000000000000000000000..df34e4232f4b6059c4522aa033e9e8bf0fc5c08f --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.md @@ -0,0 +1,267 @@ +PAPI +==== + + + +Introduction +------------ + + dir="auto">Performance Application Programming +Interface >(PAPI)  is a portable interface to access +hardware performance counters (such as instruction counts and cache +misses) found in most modern architectures. With the new component +framework, PAPI is not limited only to CPU counters, but offers also +components for CUDA, network, Infiniband etc. + +PAPI provides two levels of interface - a simpler, high level +interface and more detailed low level interface. + +PAPI can be used with parallel as well as serial programs. + +Usage +----- + +To use PAPI, load +[module](../../environment-and-modules.html) +papi : + + $ module load papi + +This will load the default version. Execute +module avail papi for a list of installed +versions. + +Utilites +-------- + +The bin directory of PAPI (which is +automatically added to $PATH upon +loading the module) contains various utilites. + +### papi_avail + +Prints which preset events are available on the current CPU. The third +column indicated whether the preset event is available on the current +CPU. + + $ papi_avail + Available events and hardware information. + -------------------------------------------------------------------------------- + PAPI Version : 5.3.2.0 + Vendor string and code : GenuineIntel (1) + Model string and code : Intel(R) Xeon(R) CPU E5-2670 0 @ 2.60GHz (45) + CPU Revision : 7.000000 + CPUID Info : Family: 6 Model: 45 Stepping: 7 + CPU Max Megahertz : 2601 + CPU Min Megahertz : 1200 + Hdw Threads per core : 1 + Cores per Socket : 8 + Sockets : 2 + NUMA Nodes : 2 + CPUs per Node : 8 + Total CPUs : 16 + Running in a VM : no + Number Hardware Counters : 11 + Max Multiplex Counters : 32 + -------------------------------------------------------------------------------- + Name Code Avail Deriv Description (Note) + PAPI_L1_DCM 0x80000000 Yes No Level 1 data cache misses + PAPI_L1_ICM 0x80000001 Yes No Level 1 instruction cache misses + PAPI_L2_DCM 0x80000002 Yes Yes Level 2 data cache misses + PAPI_L2_ICM 0x80000003 Yes No Level 2 instruction cache misses + PAPI_L3_DCM 0x80000004 No No Level 3 data cache misses + PAPI_L3_ICM 0x80000005 No No Level 3 instruction cache misses + PAPI_L1_TCM 0x80000006 Yes Yes Level 1 cache misses + PAPI_L2_TCM 0x80000007 Yes No Level 2 cache misses + PAPI_L3_TCM 0x80000008 Yes No Level 3 cache misses + .... + +### papi_native_avail + +Prints which native events are available on the current +CPU. + +### class="s1">papi_cost + +Measures the cost (in cycles) of basic PAPI operations. + +###papi_mem_info + +Prints information about the memory architecture of the current +CPU. + +PAPI API +-------- + +PAPI provides two kinds of events: + +- **Preset events** is a set of predefined common CPU events, + >standardized across platforms. +- **Native events **is a set of all events supported by the + current hardware. This is a larger set of features than preset. For + other components than CPU, only native events are usually available. + +To use PAPI in your application, you need to link the appropriate +include file. + +- papi.h for C +- f77papi.h for Fortran 77 +- f90papi.h for Fortran 90 +- fpapi.h for Fortran with preprocessor + +The include path is automatically added by papi module to +$INCLUDE. + +### High level API + +Please refer +to <http://icl.cs.utk.edu/projects/papi/wiki/PAPIC:High_Level> for a +description of the High level API. + +### Low level API + +Please refer +to <http://icl.cs.utk.edu/projects/papi/wiki/PAPIC:Low_Level> for a +description of the Low level API. + +### Timers + +PAPI provides the most accurate timers the platform can support. +See <http://icl.cs.utk.edu/projects/papi/wiki/PAPIC:Timers> + +### System information + +PAPI can be used to query some system infromation, such as CPU name and +MHz. +See <http://icl.cs.utk.edu/projects/papi/wiki/PAPIC:System_Information> + +Example +------- + +The following example prints MFLOPS rate of a naive matrix-matrix +multiplication : + + #include <stdlib.h> + #include <stdio.h> + #include "papi.h" + #define SIZE 1000 + + int main(int argc, char **argv) { + float matrixa[SIZE][SIZE], matrixb[SIZE][SIZE], mresult[SIZE][SIZE]; + float real_time, proc_time, mflops; + long long flpins; + int retval; + int i,j,k; + + /* Initialize the Matrix arrays */ + for ( i=0; i<SIZE*SIZE; i++ ){ + mresult[0][i] = 0.0; + matrixa[0][i] = matrixb[0][i] = rand()*(float)1.1; + } +  + /* Setup PAPI library and begin collecting data from the counters */ + if((retval=PAPI_flops( &real_time, &proc_time, &flpins, &mflops))<PAPI_OK) + printf("Error!"); + + /* A naive Matrix-Matrix multiplication */ + for (i=0;i<SIZE;i++) + for(j=0;j<SIZE;j++) + for(k=0;k<SIZE;k++) + mresult[i][j]=mresult[i][j] + matrixa[i][k]*matrixb[k][j]; + + /* Collect the data into the variables passed in */ + if((retval=PAPI_flops( &real_time, &proc_time, &flpins, &mflops))<PAPI_OK) + printf("Error!"); + + printf("Real_time:t%fnProc_time:t%fnTotal flpins:t%lldnMFLOPS:tt%fn", real_time, proc_time, flpins, mflops); + PAPI_shutdown(); + return 0; + } + + Now compile and run the example : + + $ gcc matrix.c -o matrix -lpapi + $ ./matrix + Real_time: 8.852785 + Proc_time: 8.850000 + Total flpins: 6012390908 + MFLOPS: 679.366211 + +Let's try with optimizations enabled : + + $ gcc -O3 matrix.c -o matrix -lpapi + $ ./matrix + Real_time: 0.000020 + Proc_time: 0.000000 + Total flpins: 6 + MFLOPS: inf + +Now we see a seemingly strange result - the multiplication took no time +and only 6 floating point instructions were issued. This is because the +compiler optimizations have completely removed the multiplication loop, +as the result is actually not used anywhere in the program. We can fix +this by adding some "dummy" code at the end of the Matrix-Matrix +multiplication routine : + + for (i=0; i<SIZE;i++) + for (j=0; j<SIZE; j++) + if (mresult[i][j] == -1.0) printf("x"); + +Now the compiler won't remove the multiplication loop. (However it is +still not that smart to see that the result won't ever be negative). Now +run the code again: + + $ gcc -O3 matrix.c -o matrix -lpapi + $ ./matrix + Real_time: 8.795956 + Proc_time: 8.790000 + Total flpins: 18700983160 + MFLOPS: 2127.529297 + +### Intel Xeon Phi + +PAPI currently supports only a subset of counters on the Intel Xeon Phi +processor compared to Intel Xeon, for example the floating point +operations counter is missing. + +To use PAPI in [Intel Xeon +Phi](../intel-xeon-phi.html) native applications, you +need to load module with " -mic" suffix, +for example " papi/5.3.2-mic" : + + $ module load papi/5.3.2-mic + +Then, compile your application in the following way: + + $ module load intel + $ icc -mmic -Wl,-rpath,/apps/intel/composer_xe_2013.5.192/compiler/lib/mic matrix-mic.c -o matrix-mic -lpapi -lpfm + +To execute the application on MIC, you need to manually set +LD_LIBRARY_PATH : + + $ qsub -q qmic -A NONE-0-0 -I + $ ssh mic0 + $ export LD_LIBRARY_PATH=/apps/tools/papi/5.4.0-mic/lib/ + $ ./matrix-mic + +Alternatively, you can link PAPI statically ( +-static flag), then +LD_LIBRARY_PATH does not need to be set. + +You can also execute the PAPI tools on MIC : + + $ /apps/tools/papi/5.4.0-mic/bin/papi_native_avail + +To use PAPI in offload mode, you need to provide both host and MIC +versions of PAPI: + + $ module load papi/5.4.0 + $ icc matrix-offload.c -o matrix-offload -offload-option,mic,compiler,"-L$PAPI_HOME-mic/lib -lpapi" -lpapi + +References +---------- + +1. <http://icl.cs.utk.edu/papi/> Main project page +2. <http://icl.cs.utk.edu/projects/papi/wiki/Main_Page> Wiki +3. <http://icl.cs.utk.edu/papi/docs/> API Documentation + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.md new file mode 100644 index 0000000000000000000000000000000000000000..df942821880a9b653310e3d01b1489cf13665e76 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.md @@ -0,0 +1,107 @@ +Scalasca +======== + +Introduction +------------------------- + +[Scalasca](http://www.scalasca.org/) is a software tool +that supports the performance optimization of parallel programs by +measuring and analyzing their runtime behavior. The analysis identifies +potential performance bottlenecks – in particular those concerning +communication and synchronization – and offers guidance in exploring +their causes. + +Scalasca supports profiling of MPI, OpenMP and hybrid MPI+OpenMP +applications. + +Installed versions +------------------ + +There are currently two versions of Scalasca 2.0 +[modules](../../environment-and-modules.html) installed +on Anselm: + +- class="s1"> + scalasca2/2.0-gcc-openmpi, for usage with + [GNU Compiler](../compilers.html) and + [OpenMPI](../mpi-1/Running_OpenMPI.html), + +- class="s1"> + scalasca2/2.0-icc-impi, for usage with + [Intel Compiler](../compilers.html) and [Intel + MPI](../mpi-1/running-mpich2.html). + +Usage +----- + +Profiling a parallel application with Scalasca consists of three steps: + +1. Instrumentation, compiling the application such way, that the + profiling data can be generated. +2. Runtime measurement, running the application with the Scalasca + profiler to collect performance data. +3. Analysis of reports + +### Instrumentation + +Instrumentation via " scalasca +-instrument" is discouraged. Use [Score-P +instrumentation](score-p.html). + +### Runtime measurement + +After the application is instrumented, runtime measurement can be +performed with the " scalasca -analyze" +command. The syntax is : + + scalasca -analyze [scalasca options] +[launcher] [launcher options] [program] [program options] + +An example : + + $ scalasca -analyze mpirun -np 4 ./mympiprogram + +Some notable Scalsca options are: + +-t Enable trace data collection. By default, only summary data are +collected. +-e <directory> Specify a directory to save the collected data to. +By default, Scalasca saves the data to a directory with +prefix >scorep_, followed by name of the executable and launch +configuration. + +Scalasca can generate a huge amount of data, especially if tracing is +enabled. Please consider saving the data to a [scratch +directory](../../storage.html). + +### Analysis of reports + +For the analysis, you must have [Score-P](score-p.html) +and [CUBE](cube.html) modules loaded. The analysis is +done in two steps, first, the data is preprocessed and then CUBE GUI +tool is launched. + +To launch the analysis, run : + +` +scalasca -examine [options] <experiment_directory> +` + +If you do not wish to launch the GUI tool, use the "-s" option : + +` +scalasca -examine -s <experiment_directory> +` + +Alternatively you can open CUBE and load the data directly from here. +Keep in mind that in that case the preprocessing is not done and not all +metrics will be shown in the viewer. + +Refer to [CUBE documentation](cube.html) on usage of the +GUI viewer. + +References +---------- + +1. <http://www.scalasca.org/> + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.md new file mode 100644 index 0000000000000000000000000000000000000000..0462b82ff2e97f3e2c0078740dc241bae80272e2 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.md @@ -0,0 +1,148 @@ +Score-P +======= + +Introduction +------------ + +The [Score-P measurement +infrastructure](http://www.vi-hps.org/projects/score-p/) +is a highly scalable and easy-to-use tool suite for profiling, event +tracing, and online analysis of HPC applications. + +Score-P can be used as an instrumentation tool for +[Scalasca](scalasca.html). + +Installed versions +------------------ + +There are currently two versions of Score-P version 1.2.6 +[modules](../../environment-and-modules.html) installed +on Anselm : + +- class="s1">scorep/1.2.3-gcc-openmpi>, for usage + with [GNU + Compiler](../compilers.html)> and [OpenMPI](../mpi-1/Running_OpenMPI.html){.internal>, + +- class="s1">scorep/1.2.3-icc-impi>, for usage + with [Intel + Compiler](../compilers.html)> and [Intel + MPI](../mpi-1/running-mpich2.html)>. + +Instrumentation +--------------- + +There are three ways to instrument your parallel applications in +order to enable performance data collection : + +1. >Automated instrumentation using compiler +2. >Manual instrumentation using API calls +3. >Manual instrumentation using directives + +### Automated instrumentation + +is the easiest method. Score-P will automatically add instrumentation to +every routine entry and exit using compiler hooks, and will intercept +MPI calls and OpenMP regions. This method might, however, produce a +large number of data. If you want to focus on profiler a specific +regions of your code, consider using the manual instrumentation methods. +To use automated instrumentation, simply prepend +scorep to your compilation command. For +example, replace : + +` +$ mpif90 -c foo.f90 +$ mpif90 -c bar.f90 +$ mpif90 -o myapp foo.o bar.o +` + +with : + +` +$ scorep mpif90 -c foo.f90 +$ scorep mpif90 -c bar.f90 +$ scorep mpif90 -o myapp foo.o bar.o +` + +Usually your program is compiled using a Makefile or similar script, so +it advisable to add the scorep command to +your definition of variables CC, +CXX, class="monospace">FCC etc. + +It is important that scorep is prepended +also to the linking command, in order to link with Score-P +instrumentation libraries. + +###Manual instrumentation using API calls + +To use this kind of instrumentation, use +scorep with switch +--user. You will then mark regions to be +instrumented by inserting API calls. + +An example in C/C++ : + + #include <scorep/SCOREP_User.h> + void foo() + { + SCOREP_USER_REGION_DEFINE( my_region_handle ) + // more declarations + SCOREP_USER_REGION_BEGIN( my_region_handle, "foo", SCOREP_USER_REGION_TYPE_COMMON ) + // do something + SCOREP_USER_REGION_END( my_region_handle ) + } + + and Fortran : + + #include "scorep/SCOREP_User.inc" + subroutine foo + SCOREP_USER_REGION_DEFINE( my_region_handle ) + ! more declarations + SCOREP_USER_REGION_BEGIN( my_region_handle, "foo", SCOREP_USER_REGION_TYPE_COMMON ) + ! do something + SCOREP_USER_REGION_END( my_region_handle ) + end subroutine foo + +Please refer to the [documentation for description of the +API](https://silc.zih.tu-dresden.de/scorep-current/pdf/scorep.pdf). + +###Manual instrumentation using directives + +This method uses POMP2 directives to mark regions to be instrumented. To +use this method, use command scorep +--pomp. + +Example directives in C/C++ : + + void foo(...) + { + /* declarations */ + #pragma pomp inst begin(foo) + ... + if (<condition>) + { + #pragma pomp inst altend(foo) + return; + } + ... + #pragma pomp inst end(foo) + } + +and in Fortran : + + subroutine foo(...) + !declarations + !POMP$ INST BEGIN(foo) + ... + if (<condition>) then + !POMP$ INST ALTEND(foo) + return + end if + ... + !POMP$ INST END(foo) + end subroutine foo + +The directives are ignored if the program is compiled without Score-P. +Again, please refer to the +[documentation](https://silc.zih.tu-dresden.de/scorep-current/pdf/scorep.pdf) +for a more elaborate description. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.md new file mode 100644 index 0000000000000000000000000000000000000000..2b1f34c23030b3dc1f0e1ad95218e114964272ff --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.md @@ -0,0 +1,86 @@ +Debuggers and profilers summary +=============================== + + + +Introduction +------------ + +We provide state of the art programms and tools to develop, profile and +debug HPC codes at IT4Innovations. +On these pages, we provide an overview of the profiling and debugging +tools available on Anslem at IT4I. + +Intel debugger +-------------- + +The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](https://docs.it4i.cz/anselm-cluster-documentation/software/debuggers/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) +for running the GUI. + + $ module load intel + $ idb + +Read more at the [Intel +Debugger](../intel-suite/intel-debugger.html) page. + +Allinea Forge (DDT/MAP) +----------------------- + +Allinea DDT, is a commercial debugger primarily for debugging parallel +MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel +Xeon Phi accelerators. DDT provides all the standard debugging features +(stack trace, breakpoints, watches, view variables, threads etc.) for +every thread running as part of your program, or for every process - +even if these processes are distributed across a cluster using an MPI +implementation. + + $ module load Forge + $ forge + +Read more at the [Allinea DDT](allinea-ddt.html) page. + +Allinea Performance Reports +--------------------------- + +Allinea Performance Reports characterize the performance of HPC +application runs. After executing your application through the tool, a +synthetic HTML report is generated automatically, containing information +about several metrics along with clear behavior statements and hints to +help you improve the efficiency of your runs. Our license is limited to +64 MPI processes. + + $ module load PerformanceReports/6.0 + $ perf-report mpirun -n 64 ./my_application argument01 argument02 + +Read more at the [Allinea Performance +Reports](allinea-performance-reports.html) page. + +RougeWave Totalview +------------------- + +TotalView is a source- and machine-level debugger for multi-process, +multi-threaded programs. Its wide range of tools provides ways to +analyze, organize, and test programs, making it easy to isolate and +identify problems in individual threads and processes in programs of +great complexity. + + $ module load totalview + $ totalview + +Read more at the [Totalview](total-view.html) page. + +Vampir trace analyzer +--------------------- + +Vampir is a GUI trace analyzer for traces in OTF format. + + $ module load Vampir/8.5.0 + $ vampir + +Read more at +the [Vampir](../../../salomon/software/debuggers/vampir.html) page. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.md new file mode 100644 index 0000000000000000000000000000000000000000..6299f5b9f8fcb19bc5381389b708af21b4b94ff3 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.md @@ -0,0 +1,165 @@ +Total View +========== + +TotalView is a GUI-based source code multi-process, multi-thread +debugger. + +License and Limitations for Anselm Users +---------------------------------------- + +On Anselm users can debug OpenMP or MPI code that runs up to 64 parallel +processes. These limitation means that: + +   1 user can debug up 64 processes, or +   32 users can debug 2 processes, etc. + +Debugging of GPU accelerated codes is also supported. + +You can check the status of the licenses here: + + cat /apps/user/licenses/totalview_features_state.txt + + # totalview + # ------------------------------------------------- + # FEATURE                      TOTAL  USED AVAIL + # ------------------------------------------------- + TotalView_Team                    64     0    64 + Replay                            64     0    64 + CUDA                              64     0    64 + +Compiling Code to run with TotalView +------------------------------------ + +### Modules + +Load all necessary modules to compile the code. For example: + + module load intel + + module load impi  ... or ... module load openmpi/X.X.X-icc + +Load the TotalView module: + + module load totalview/8.12 + +Compile the code: + + mpicc -g -O0 -o test_debug test.c + + mpif90 -g -O0 -o test_debug test.f + +### Compiler flags + +Before debugging, you need to compile your code with theses flags: + +-g** : Generates extra debugging information usable by GDB. -g3** +includes even more debugging information. This option is available for +GNU and INTEL C/C++ and Fortran compilers. + +-O0** : Suppress all optimizations.** + +Starting a Job with TotalView +----------------------------- + +Be sure to log in with an X window forwarding enabled. This could mean +using the -X in the ssh: + + ssh -X username@anselm.it4i.cz + +Other options is to access login node using VNC. Please see the detailed +information on how to use graphic user interface on Anselm +[here](https://docs.it4i.cz/anselm-cluster-documentation/software/debuggers/resolveuid/11e53ad0d2fd4c5187537f4baeedff33#VNC). + +From the login node an interactive session with X windows forwarding (-X +option) can be started by following command: + + qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=16:mpiprocs=16,walltime=01:00:00 + +Then launch the debugger with the totalview command followed by the name +of the executable to debug. + +### Debugging a serial code + +To debug a serial code use: + + totalview test_debug + +### Debugging a parallel code - option 1 + +To debug a parallel code compiled with >**OpenMPI** you need +to setup your TotalView environment: + +Please note:** To be able to run parallel debugging procedure from the +command line without stopping the debugger in the mpiexec source code +you have to add the following function to your **~/.tvdrc** file: + + proc mpi_auto_run_starter {loaded_id} { +    set starter_programs {mpirun mpiexec orterun} +    set executable_name [TV::symbol get $loaded_id full_pathname] +    set file_component [file tail $executable_name] + +    if {[lsearch -exact $starter_programs $file_component] != -1} { +        puts "*************************************" +        puts "Automatically starting $file_component" +        puts "*************************************" +        dgo +    } + } + + # Append this function to TotalView's image load callbacks so that + # TotalView run this program automatically. + + dlappend TV::image_load_callbacks mpi_auto_run_starter + +The source code of this function can be also found in + + /apps/mpi/openmpi/intel/1.6.5/etc/openmpi-totalview.tcl + +You can also add only following line to you ~/.tvdrc file instead of +the entire function: + +source /apps/mpi/openmpi/intel/1.6.5/etc/openmpi-totalview.tcl** + +You need to do this step only once. + +Now you can run the parallel debugger using: + + mpirun -tv -n 5 ./test_debug + +When following dialog appears click on "Yes" + + + +At this point the main TotalView GUI window will appear and you can +insert the breakpoints and start debugging: + + + +### Debugging a parallel code - option 2 + +Other option to start new parallel debugging session from a command line +is to let TotalView to execute mpirun by itself. In this case user has +to specify a MPI implementation used to compile the source code. + +The following example shows how to start debugging session with Intel +MPI: + + module load intel/13.5.192 impi/4.1.1.036 totalview/8/13 + + totalview -mpi "Intel MPI-Hydra" -np 8 ./hello_debug_impi + +After running previous command you will see the same window as shown in +the screenshot above. + +More information regarding the command line parameters of the TotalView +can be found TotalView Reference Guide, Chapter 7: TotalView Command +Syntax.  + +Documentation +------------- + +[1] The [TotalView +documentation](http://www.roguewave.com/support/product-documentation/totalview-family.aspx#totalview) +web page is a good resource for learning more about some of the advanced +TotalView features. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/totalview1.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/totalview1.png new file mode 100644 index 0000000000000000000000000000000000000000..09678f174b4f5d05ccda08cc11de059ee53e7e09 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/totalview1.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/totalview2.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/totalview2.png new file mode 100644 index 0000000000000000000000000000000000000000..9d26ffd97db190a6b8161dd6358fc759ad2cb2ca Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/totalview2.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.md new file mode 100644 index 0000000000000000000000000000000000000000..615b26a1fe671bd05d586143f70f955b30f09b93 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.md @@ -0,0 +1,292 @@ +Valgrind +======== + +Valgrind is a tool for memory debugging and profiling. + +About Valgrind +-------------- + +Valgrind is an open-source tool, used mainly for debuggig memory-related +problems, such as memory leaks, use of uninitalized memory etc. in C/C++ +applications. The toolchain was however extended over time with more +functionality, such as debugging of threaded applications, cache +profiling, not limited only to C/C++. + +Valgind is an extremely useful tool for debugging memory errors such as +[off-by-one](http://en.wikipedia.org/wiki/Off-by-one_error). +Valgrind uses a virtual machine and dynamic recompilation of binary +code, because of that, you can expect that programs being debugged by +Valgrind run 5-100 times slower. + +The main tools available in Valgrind are : + +- **Memcheck**, the original, must used and default tool. Verifies + memory access in you program and can detect use of unitialized + memory, out of bounds memory access, memory leaks, double free, etc. +- **Massif**, a heap profiler. +- **Hellgrind** and **DRD** can detect race conditions in + multi-threaded applications. +- **Cachegrind**, a cache profiler. +- **Callgrind**, a callgraph analyzer. +- For a full list and detailed documentation, please refer to the + [official Valgrind + documentation](http://valgrind.org/docs/). + +Installed versions +------------------ + +There are two versions of Valgrind available on Anselm. + +- >Version 3.6.0, installed by operating system vendor + in /usr/bin/valgrind. + >This version is available by default, without the need + to load any module. This version however does not provide additional + MPI support. +- >Version 3.9.0 with support for Intel MPI, available in + [module](../../environment-and-modules.html) + valgrind/3.9.0-impi. After loading the + module, this version replaces the default valgrind. + +Usage +----- + +Compile the application which you want to debug as usual. It is +advisable to add compilation flags -g (to +add debugging information to the binary so that you will see original +source code lines in the output) and -O0 +(to disable compiler optimizations). + +For example, lets look at this C code, which has two problems : + + #include <stdlib.h> + + void f(void) + { + int* x = malloc(10 * sizeof(int)); + x[10] = 0; // problem 1: heap block overrun + } // problem 2: memory leak -- x not freed + + int main(void) + { + f(); + return 0; + } + +Now, compile it with Intel compiler : + + $ module add intel + $ icc -g valgrind-example.c -o valgrind-example + +Now, lets run it with Valgrind. The syntax is : + + valgrind [valgrind options] <your program +binary> [your program options] + +If no Valgrind options are specified, Valgrind defaults to running +Memcheck tool. Please refer to the Valgrind documentation for a full +description of command line options. + + $ valgrind ./valgrind-example + ==12652== Memcheck, a memory error detector + ==12652== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==12652== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==12652== Command: ./valgrind-example + ==12652== + ==12652== Invalid write of size 4 + ==12652== at 0x40053E: f (valgrind-example.c:6) + ==12652== by 0x40054E: main (valgrind-example.c:11) + ==12652== Address 0x5861068 is 0 bytes after a block of size 40 alloc'd + ==12652== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==12652== by 0x400528: f (valgrind-example.c:5) + ==12652== by 0x40054E: main (valgrind-example.c:11) + ==12652== + ==12652== + ==12652== HEAP SUMMARY: + ==12652== in use at exit: 40 bytes in 1 blocks + ==12652== total heap usage: 1 allocs, 0 frees, 40 bytes allocated + ==12652== + ==12652== LEAK SUMMARY: + ==12652== definitely lost: 40 bytes in 1 blocks + ==12652== indirectly lost: 0 bytes in 0 blocks + ==12652== possibly lost: 0 bytes in 0 blocks + ==12652== still reachable: 0 bytes in 0 blocks + ==12652== suppressed: 0 bytes in 0 blocks + ==12652== Rerun with --leak-check=full to see details of leaked memory + ==12652== + ==12652== For counts of detected and suppressed errors, rerun with: -v + ==12652== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 6 from 6) + +In the output we can see that Valgrind has detected both errors - the +off-by-one memory access at line 5 and a memory leak of 40 bytes. If we +want a detailed analysis of the memory leak, we need to run Valgrind +with --leak-check=full option : + + $ valgrind --leak-check=full ./valgrind-example + ==23856== Memcheck, a memory error detector + ==23856== Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al. + ==23856== Using Valgrind-3.6.0 and LibVEX; rerun with -h for copyright info + ==23856== Command: ./valgrind-example + ==23856== + ==23856== Invalid write of size 4 + ==23856== at 0x40067E: f (valgrind-example.c:6) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== Address 0x66e7068 is 0 bytes after a block of size 40 alloc'd + ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) + ==23856== by 0x400668: f (valgrind-example.c:5) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== + ==23856== + ==23856== HEAP SUMMARY: + ==23856== in use at exit: 40 bytes in 1 blocks + ==23856== total heap usage: 1 allocs, 0 frees, 40 bytes allocated + ==23856== + ==23856== 40 bytes in 1 blocks are definitely lost in loss record 1 of 1 + ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) + ==23856== by 0x400668: f (valgrind-example.c:5) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== + ==23856== LEAK SUMMARY: + ==23856== definitely lost: 40 bytes in 1 blocks + ==23856== indirectly lost: 0 bytes in 0 blocks + ==23856== possibly lost: 0 bytes in 0 blocks + ==23856== still reachable: 0 bytes in 0 blocks + ==23856== suppressed: 0 bytes in 0 blocks + ==23856== + ==23856== For counts of detected and suppressed errors, rerun with: -v + ==23856== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 6 from 6) + +Now we can see that the memory leak is due to the +malloc() at line 6. + +Usage with MPI +--------------------------- + +Although Valgrind is not primarily a parallel debugger, it can be used +to debug parallel applications as well. When launching your parallel +applications, prepend the valgrind command. For example : + + $ mpirun -np 4 valgrind myapplication + +The default version without MPI support will however report a large +number of false errors in the MPI library, such as : + + ==30166== Conditional jump or move depends on uninitialised value(s) + ==30166== at 0x4C287E8: strlen (mc_replace_strmem.c:282) + ==30166== by 0x55443BD: I_MPI_Processor_model_number (init_interface.c:427) + ==30166== by 0x55439E0: I_MPI_Processor_arch_code (init_interface.c:171) + ==30166== by 0x558D5AE: MPID_nem_impi_init_shm_configuration (mpid_nem_impi_extensions.c:1091) + ==30166== by 0x5598F4C: MPID_nem_init_ckpt (mpid_nem_init.c:566) + ==30166== by 0x5598B65: MPID_nem_init (mpid_nem_init.c:489) + ==30166== by 0x539BD75: MPIDI_CH3_Init (ch3_init.c:64) + ==30166== by 0x5578743: MPID_Init (mpid_init.c:193) + ==30166== by 0x554650A: MPIR_Init_thread (initthread.c:539) + ==30166== by 0x553369F: PMPI_Init (init.c:195) + ==30166== by 0x4008BD: main (valgrind-example-mpi.c:18) + +so it is better to use the MPI-enabled valgrind from module. The MPI +version requires library +/apps/tools/valgrind/3.9.0/impi/lib/valgrind/libmpiwrap-amd64-linux.so, +which must be included in the LD_PRELOAD +environment variable. + +Lets look at this MPI example : + + #include <stdlib.h> + #include <mpi.h> + + int main(int argc, char *argv[]) + { +      int *data = malloc(sizeof(int)*99); + +      MPI_Init(&argc, &argv); +     MPI_Bcast(data, 100, MPI_INT, 0, MPI_COMM_WORLD); +      MPI_Finalize(); + +        return 0; + } + +There are two errors - use of uninitialized memory and invalid length of +the buffer. Lets debug it with valgrind : + + $ module add intel impi + $ mpicc -g valgrind-example-mpi.c -o valgrind-example-mpi + $ module add valgrind/3.9.0-impi + $ mpirun -np 2 -env LD_PRELOAD /apps/tools/valgrind/3.9.0/impi/lib/valgrind/libmpiwrap-amd64-linux.so valgrind ./valgrind-example-mpi + +Prints this output : (note that there is output printed for every +launched MPI process) + + ==31318== Memcheck, a memory error detector + ==31318== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==31318== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==31318== Command: ./valgrind-example-mpi + ==31318== + ==31319== Memcheck, a memory error detector + ==31319== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==31319== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==31319== Command: ./valgrind-example-mpi + ==31319== + valgrind MPI wrappers 31319: Active for pid 31319 + valgrind MPI wrappers 31319: Try MPIWRAP_DEBUG=help for possible options + valgrind MPI wrappers 31318: Active for pid 31318 + valgrind MPI wrappers 31318: Try MPIWRAP_DEBUG=help for possible options + ==31319== Unaddressable byte(s) found during client check request + ==31319== at 0x4E35974: check_mem_is_addressable_untyped (libmpiwrap.c:960) + ==31319== by 0x4E5D0FE: PMPI_Bcast (libmpiwrap.c:908) + ==31319== by 0x400911: main (valgrind-example-mpi.c:20) + ==31319== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd + ==31319== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31319== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31319== + ==31318== Uninitialised byte(s) found during client check request + ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) + ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) + ==31318== by 0x400911: main (valgrind-example-mpi.c:20) + ==31318== Address 0x6929040 is 0 bytes inside a block of size 396 alloc'd + ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31318== + ==31318== Unaddressable byte(s) found during client check request + ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) + ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) + ==31318== by 0x400911: main (valgrind-example-mpi.c:20) + ==31318== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd + ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31318== + ==31318== + ==31318== HEAP SUMMARY: + ==31318== in use at exit: 3,172 bytes in 67 blocks + ==31318== total heap usage: 191 allocs, 124 frees, 81,203 bytes allocated + ==31318== + ==31319== + ==31319== HEAP SUMMARY: + ==31319== in use at exit: 3,172 bytes in 67 blocks + ==31319== total heap usage: 175 allocs, 108 frees, 48,435 bytes allocated + ==31319== + ==31318== LEAK SUMMARY: + ==31318== definitely lost: 408 bytes in 3 blocks + ==31318== indirectly lost: 256 bytes in 1 blocks + ==31318== possibly lost: 0 bytes in 0 blocks + ==31318== still reachable: 2,508 bytes in 63 blocks + ==31318== suppressed: 0 bytes in 0 blocks + ==31318== Rerun with --leak-check=full to see details of leaked memory + ==31318== + ==31318== For counts of detected and suppressed errors, rerun with: -v + ==31318== Use --track-origins=yes to see where uninitialised values come from + ==31318== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 4 from 4) + ==31319== LEAK SUMMARY: + ==31319== definitely lost: 408 bytes in 3 blocks + ==31319== indirectly lost: 256 bytes in 1 blocks + ==31319== possibly lost: 0 bytes in 0 blocks + ==31319== still reachable: 2,508 bytes in 63 blocks + ==31319== suppressed: 0 bytes in 0 blocks + ==31319== Rerun with --leak-check=full to see details of leaked memory + ==31319== + ==31319== For counts of detected and suppressed errors, rerun with: -v + ==31319== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4) + +We can see that Valgrind has reported use of unitialised memory on the +master process (which reads the array to be broadcasted) and use of +unaddresable memory on both processes. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.md new file mode 100644 index 0000000000000000000000000000000000000000..25fb484c6bf776ca15594007823269357514b8d3 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.md @@ -0,0 +1,33 @@ +Vampir +====== + +Vampir is a commercial trace analysis and visualisation tool. It can +work with traces in OTF and OTF2 formats. It does not have the +functionality to collect traces, you need to use a trace collection tool +(such +as [Score-P](../../../salomon/software/debuggers/score-p.html)) +first to collect the traces. + + +------------------------------------- + +Installed versions +------------------ + +Version 8.5.0 is currently installed as module +Vampir/8.5.0 : + + $ module load Vampir/8.5.0 + $ vampir & + +User manual +----------- + +You can find the detailed user manual in PDF format in +$EBROOTVAMPIR/doc/vampir-manual.pdf + +References +---------- + +1. <https://www.vampir.eu> + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vtune-amplifier.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vtune-amplifier.png new file mode 100644 index 0000000000000000000000000000000000000000..75ee99d84b87649151f22edad65de021ec348f1c Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vtune-amplifier.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.md new file mode 100644 index 0000000000000000000000000000000000000000..79cf34786cbae1912747eb9dc563d189cc9c8df7 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.md @@ -0,0 +1,182 @@ +GPI-2 +===== + +A library that implements the GASPI specification + + + +Introduction +------------ + +Programming Next Generation Supercomputers: GPI-2 is an API library for +asynchronous interprocess, cross-node communication. It provides a +flexible, scalable and fault tolerant interface for parallel +applications. + +The GPI-2 library +([www.gpi-site.com/gpi2/](http://www.gpi-site.com/gpi2/)) +implements the GASPI specification (Global Address Space Programming +Interface, +[www.gaspi.de](http://www.gaspi.de/en/project.html)). +GASPI is a Partitioned Global Address Space (PGAS) API. It aims at +scalable, flexible and failure tolerant computing in massively parallel +environments. + +Modules +------- + +The GPI-2, version 1.0.2 is available on Anselm via module gpi2: + + $ module load gpi2 + +The module sets up environment variables, required for linking and +running GPI-2 enabled applications. This particular command loads the +default module, which is gpi2/1.0.2 + +Linking +------- + +Link with -lGPI2 -libverbs + +Load the gpi2 module. Link using **-lGPI2** and *** **-libverbs** +switches to link your code against GPI-2. The GPI-2 requires the OFED +infinband communication library ibverbs. + +### Compiling and linking with Intel compilers + + $ module load intel + $ module load gpi2 + $ icc myprog.c -o myprog.x -Wl,-rpath=$LIBRARY_PATH -lGPI2 -libverbs + +### Compiling and linking with GNU compilers + + $ module load gcc + $ module load gpi2 + $ gcc myprog.c -o myprog.x -Wl,-rpath=$LIBRARY_PATH -lGPI2 -libverbs + +Running the GPI-2 codes +----------------------- + +gaspi_run + +gaspi_run starts the GPI-2 application + +The gaspi_run utility is used to start and run GPI-2 applications: + + $ gaspi_run -m machinefile ./myprog.x + +A machine file (**machinefile**) with the hostnames of nodes where the +application will run, must be provided. The*** machinefile lists all +nodes on which to run, one entry per node per process. This file may be +hand created or obtained from standard $PBS_NODEFILE: + + $ cut -f1 -d"." $PBS_NODEFILE > machinefile + +machinefile: + + cn79 + cn80 + +This machinefile will run 2 GPI-2 processes, one on node cn79 other on +node cn80. + +machinefle: + + cn79 + cn79 + cn80 + cn80 + +This machinefile will run 4 GPI-2 processes, 2 on node cn79 o 2 on node +cn80. + +Use the **mpiprocs** to control how many GPI-2 processes will run per +node + +Example: + + $ qsub -A OPEN-0-0 -q qexp -l select=2:ncpus=16:mpiprocs=16 -I + +This example will produce $PBS_NODEFILE with 16 entries per node. + +### gaspi_logger + +gaspi_logger views the output form GPI-2 application ranks + +The gaspi_logger utility is used to view the output from all nodes +except the master node (rank 0). The gaspi_logger is started, on +another session, on the master node - the node where the gaspi_run is +executed. The output of the application, when called with +gaspi_printf(), will be redirected to the gaspi_logger. Other I/O +routines (e.g. printf) will not. + +Example +------- + +Following is an example GPI-2 enabled code: + + #include <GASPI.h> + #include <stdlib.h> + + void success_or_exit ( const char* file, const int line, const int ec) + { + if (ec != GASPI_SUCCESS) + { + gaspi_printf ("Assertion failed in %s[%i]:%dn", file, line, ec); + exit (1); + } + } + + #define ASSERT(ec) success_or_exit (__FILE__, __LINE__, ec); + + int main(int argc, char *argv[]) + { + gaspi_rank_t rank, num; + gaspi_return_t ret; + + /* Initialize GPI-2 */ + ASSERT( gaspi_proc_init(GASPI_BLOCK) ); + + /* Get ranks information */ + ASSERT( gaspi_proc_rank(&rank) ); + ASSERT( gaspi_proc_num(&num) ); + + gaspi_printf("Hello from rank %d of %dn", + rank, num); + + /* Terminate */ + ASSERT( gaspi_proc_term(GASPI_BLOCK) ); + + return 0; + } + +Load modules and compile: + + $ module load gcc gpi2 + $ gcc helloworld_gpi.c -o helloworld_gpi.x -Wl,-rpath=$LIBRARY_PATH -lGPI2 -libverbs + +Submit the job and run the GPI-2 application + + $ qsub -q qexp -l select=2:ncpus=1:mpiprocs=1,place=scatter,walltime=00:05:00 -I + qsub: waiting for job 171247.dm2 to start + qsub: job 171247.dm2 ready + + cn79 $ module load gpi2 + cn79 $ cut -f1 -d"." $PBS_NODEFILE > machinefile + cn79 $ gaspi_run -m machinefile ./helloworld_gpi.x + Hello from rank 0 of 2 + +At the same time, in another session, you may start the gaspi logger: + + $ ssh cn79 + cn79 $ gaspi_logger + GASPI Logger (v1.1) + [cn80:0] Hello from rank 1 of 2 + +In this example, we compile the helloworld_gpi.c code using the **gnu +compiler** (gcc) and link it to the GPI-2 and ibverbs library. The +library search path is compiled in. For execution, we use the qexp +queue, 2 nodes 1 core each. The GPI module must be loaded on the master +compute node (in this example the cn79), gaspi_logger is used from +different session to view the output of the second process. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.md new file mode 100644 index 0000000000000000000000000000000000000000..7c41a4badd18bd8868a96cf81a5bfd780f2daef5 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.md @@ -0,0 +1,93 @@ +Intel Parallel Studio +===================== + + + +The Anselm cluster provides following elements of the Intel Parallel +Studio XE + + Intel Parallel Studio XE + ------------------------------------------------- + Intel Compilers + Intel Debugger + Intel MKL Library + Intel Integrated Performance Primitives Library + Intel Threading Building Blocks Library + +Intel compilers +--------------- + +The Intel compilers version 13.1.3 are available, via module intel. The +compilers include the icc C and C++ compiler and the ifort fortran +77/90/95 compiler. + + $ module load intel + $ icc -v + $ ifort -v + +Read more at the [Intel +Compilers](intel-suite/intel-compilers.html) page. + +Intel debugger +-------------- + + The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](https://docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) +for running the GUI. + + $ module load intel + $ idb + +Read more at the [Intel +Debugger](intel-suite/intel-debugger.html) page. + +Intel Math Kernel Library +------------------------- + +Intel Math Kernel Library (Intel MKL) is a library of math kernel +subroutines, extensively threaded and optimized for maximum performance. +Intel MKL unites and provides these basic components: BLAS, LAPACK, +ScaLapack, PARDISO, FFT, VML, VSL, Data fitting, Feast Eigensolver and +many more. + + $ module load mkl + +Read more at the [Intel MKL](intel-suite/intel-mkl.html) +page. + +Intel Integrated Performance Primitives +--------------------------------------- + +Intel Integrated Performance Primitives, version 7.1.1, compiled for AVX +is available, via module ipp. The IPP is a library of highly optimized +algorithmic building blocks for media and data applications. This +includes signal, image and frame processing algorithms, such as FFT, +FIR, Convolution, Optical Flow, Hough transform, Sum, MinMax and many +more. + + $ module load ipp + +Read more at the [Intel +IPP](intel-suite/intel-integrated-performance-primitives.html) +page. + +Intel Threading Building Blocks +------------------------------- + +Intel Threading Building Blocks (Intel TBB) is a library that supports +scalable parallel programming using standard ISO C++ code. It does not +require special languages or compilers. It is designed to promote +scalable data parallel programming. Additionally, it fully supports +nested parallelism, so you can build larger parallel components from +smaller parallel components. To use the library, you specify tasks, not +threads, and let the library map tasks onto threads in an efficient +manner. + + $ module load tbb + +Read more at the [Intel TBB](intel-suite/intel-tbb.html) +page. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.md new file mode 100644 index 0000000000000000000000000000000000000000..14ce867f6134899a2dc91b51f5b932d6b752a424 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.md @@ -0,0 +1,66 @@ +Intel Compilers +=============== + + + +The Intel compilers version 13.1.1 are available, via module intel. The +compilers include the icc C and C++ compiler and the ifort fortran +77/90/95 compiler. + + $ module load intel + $ icc -v + $ ifort -v + +The intel compilers provide for vectorization of the code, via the AVX +instructions and support threading parallelization via OpenMP + +For maximum performance on the Anselm cluster, compile your programs +using the AVX instructions, with reporting where the vectorization was +used. We recommend following compilation options for high performance + + $ icc -ipo -O3 -vec -xAVX -vec-report1 myprog.c mysubroutines.c -o myprog.x + $ ifort -ipo -O3 -vec -xAVX -vec-report1 myprog.f mysubroutines.f -o myprog.x + +In this example, we compile the program enabling interprocedural +optimizations between source files (-ipo), aggresive loop optimizations +(-O3) and vectorization (-vec -xAVX) + +The compiler recognizes the omp, simd, vector and ivdep pragmas for +OpenMP parallelization and AVX vectorization. Enable the OpenMP +parallelization by the **-openmp** compiler switch. + + $ icc -ipo -O3 -vec -xAVX -vec-report1 -openmp myprog.c mysubroutines.c -o myprog.x + $ ifort -ipo -O3 -vec -xAVX -vec-report1 -openmp myprog.f mysubroutines.f -o myprog.x + +Read more at +<http://software.intel.com/sites/products/documentation/doclib/stdxe/2013/composerxe/compiler/cpp-lin/index.htm> + +Sandy Bridge/Haswell binary compatibility +----------------------------------------- + +Anselm nodes are currently equipped with Sandy Bridge CPUs, while +Salomon will use Haswell architecture. >The new processors are +backward compatible with the Sandy Bridge nodes, so all programs that +ran on the Sandy Bridge processors, should also run on the new Haswell +nodes. >To get optimal performance out of the Haswell +processors a program should make use of the special >AVX2 +instructions for this processor. One can do this by recompiling codes +with the compiler flags >designated to invoke these +instructions. For the Intel compiler suite, there are two ways of +doing >this: + +- >Using compiler flag (both for Fortran and C): + -xCORE-AVX2. This will create a + binary class="s1">with AVX2 instructions, specifically + for the Haswell processors. Note that the + executable >will not run on Sandy Bridge nodes. +- >Using compiler flags (both for Fortran and C): + -xAVX -axCORE-AVX2. This + will >generate multiple, feature specific auto-dispatch + code paths for Intel® processors, if there is >a + performance benefit. So this binary will run both on Sandy Bridge + and Haswell >processors. During runtime it will be + decided which path to follow, dependent on + which >processor you are running on. In general this + will result in larger binaries. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.md new file mode 100644 index 0000000000000000000000000000000000000000..35ba0de033b074f26a5a2b1a455f3b3245e012c4 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.md @@ -0,0 +1,97 @@ +Intel Debugger +============== + + + +Debugging serial applications +----------------------------- + + The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](https://docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) +for running the GUI. + + $ module load intel + $ idb + +The debugger may run in text mode. To debug in text mode, use + + $ idbc + +To debug on the compute nodes, module intel must be loaded. +The GUI on compute nodes may be accessed using the same way as in [the +GUI +section](https://docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) + +Example: + + $ qsub -q qexp -l select=1:ncpus=16 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19654.srv11 ready + + $ module load intel + $ module load java + $ icc -O0 -g myprog.c -o myprog.x + $ idb ./myprog.x + +In this example, we allocate 1 full compute node, compile program +myprog.c with debugging options -O0 -g and run the idb debugger +interactively on the myprog.x executable. The GUI access is via X11 port +forwarding provided by the PBS workload manager. + +Debugging parallel applications +------------------------------- + +Intel debugger is capable of debugging multithreaded and MPI parallel +programs as well. + +### Small number of MPI ranks + +For debugging small number of MPI ranks, you may execute and debug each +rank in separate xterm terminal (do not forget the [X +display](https://docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/resolveuid/11e53ad0d2fd4c5187537f4baeedff33)). +Using Intel MPI, this may be done in following way: + + $ qsub -q qexp -l select=2:ncpus=16 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19655.srv11 ready + + $ module load intel impi + $ mpirun -ppn 1 -hostfile $PBS_NODEFILE --enable-x xterm -e idbc ./mympiprog.x + +In this example, we allocate 2 full compute node, run xterm on each node +and start idb debugger in command line mode, debugging two ranks of +mympiprog.x application. The xterm will pop up for each rank, with idb +prompt ready. The example is not limited to use of Intel MPI + +### Large number of MPI ranks + +Run the idb debugger from within the MPI debug option. This will cause +the debugger to bind to all ranks and provide aggregated outputs across +the ranks, pausing execution automatically just after startup. You may +then set break points and step the execution manually. Using Intel MPI: + + $ qsub -q qexp -l select=2:ncpus=16 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19655.srv11 ready + + $ module load intel impi + $ mpirun -n 32 -idb ./mympiprog.x + +### Debugging multithreaded application + +Run the idb debugger in GUI mode. The menu Parallel contains number of +tools for debugging multiple threads. One of the most useful tools is +the **Serialize Execution** tool, which serializes execution of +concurrent threads for easy orientation and identification of +concurrency related bugs. + +Further information +------------------- + +Exhaustive manual on idb features and usage is published at Intel +website, +<http://software.intel.com/sites/products/documentation/doclib/stdxe/2013/composerxe/debugger/user_guide/index.htm> + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.md new file mode 100644 index 0000000000000000000000000000000000000000..5cef1f8f67d5d7759953e5579300adeff1c21af1 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.md @@ -0,0 +1,94 @@ +Intel IPP +========= + + + +Intel Integrated Performance Primitives +--------------------------------------- + +Intel Integrated Performance Primitives, version 7.1.1, compiled for AVX +vector instructions is available, via module ipp. The IPP is a very rich +library of highly optimized algorithmic building blocks for media and +data applications. This includes signal, image and frame processing +algorithms, such as FFT, FIR, Convolution, Optical Flow, Hough +transform, Sum, MinMax, as well as cryptographic functions, linear +algebra functions and many more. + +Check out IPP before implementing own math functions for data +processing, it is likely already there. + + $ module load ipp + +The module sets up environment variables, required for linking and +running ipp enabled applications. + +IPP example +----------- + + #include "ipp.h" + #include <stdio.h> + int main(int argc, char* argv[]) + { + const IppLibraryVersion *lib; + Ipp64u fm; + IppStatus status; + + status= ippInit(); //IPP initialization with the best optimization layer + if( status != ippStsNoErr ) { + printf("IppInit() Error:n"); + printf("%sn", ippGetStatusString(status) ); + return -1; + } + + //Get version info + lib = ippiGetLibVersion(); + printf("%s %sn", lib->Name, lib->Version); + + //Get CPU features enabled with selected library level + fm=ippGetEnabledCpuFeatures(); + printf("SSE :%cn",(fm>1)&1?'Y':'N'); + printf("SSE2 :%cn",(fm>2)&1?'Y':'N'); + printf("SSE3 :%cn",(fm>3)&1?'Y':'N'); + printf("SSSE3 :%cn",(fm>4)&1?'Y':'N'); + printf("SSE41 :%cn",(fm>6)&1?'Y':'N'); + printf("SSE42 :%cn",(fm>7)&1?'Y':'N'); + printf("AVX :%cn",(fm>8)&1 ?'Y':'N'); + printf("AVX2 :%cn", (fm>15)&1 ?'Y':'N' ); + printf("----------n"); + printf("OS Enabled AVX :%cn", (fm>9)&1 ?'Y':'N'); + printf("AES :%cn", (fm>10)&1?'Y':'N'); + printf("CLMUL :%cn", (fm>11)&1?'Y':'N'); + printf("RDRAND :%cn", (fm>13)&1?'Y':'N'); + printf("F16C :%cn", (fm>14)&1?'Y':'N'); + + return 0; + } + + Compile above example, using any compiler and the ipp module. + + $ module load intel + $ module load ipp + + $ icc testipp.c -o testipp.x -lippi -lipps -lippcore + +You will need the ipp module loaded to run the ipp enabled executable. +This may be avoided, by compiling library search paths into the +executable + + $ module load intel + $ module load ipp + + $ icc testipp.c -o testipp.x -Wl,-rpath=$LIBRARY_PATH -lippi -lipps -lippcore + +Code samples and documentation +------------------------------ + +Intel provides number of [Code Samples for +IPP](https://software.intel.com/en-us/articles/code-samples-for-intel-integrated-performance-primitives-library), +illustrating use of IPP. + +Read full documentation on IPP [on Intel +website,](http://software.intel.com/sites/products/search/search.php?q=&x=15&y=6&product=ipp&version=7.1&docos=lin) +in particular the [IPP Reference +manual.](http://software.intel.com/sites/products/documentation/doclib/ipp_sa/71/ipp_manual/index.htm) + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.md new file mode 100644 index 0000000000000000000000000000000000000000..935f78fcce4c90447fcd259318490f69f03fced7 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.md @@ -0,0 +1,190 @@ +Intel MKL +========= + + + +Intel Math Kernel Library +------------------------- + +Intel Math Kernel Library (Intel MKL) is a library of math kernel +subroutines, extensively threaded and optimized for maximum performance. +Intel MKL provides these basic math kernels: + +- + + + + BLAS (level 1, 2, and 3) and LAPACK linear algebra routines, + offering vector, vector-matrix, and matrix-matrix operations. +- + + + + The PARDISO direct sparse solver, an iterative sparse solver, + and supporting sparse BLAS (level 1, 2, and 3) routines for solving + sparse systems of equations. +- + + + + ScaLAPACK distributed processing linear algebra routines for + Linux* and Windows* operating systems, as well as the Basic Linear + Algebra Communications Subprograms (BLACS) and the Parallel Basic + Linear Algebra Subprograms (PBLAS). +- + + + + Fast Fourier transform (FFT) functions in one, two, or three + dimensions with support for mixed radices (not limited to sizes that + are powers of 2), as well as distributed versions of + these functions. +- + + + + Vector Math Library (VML) routines for optimized mathematical + operations on vectors. +- + + + + Vector Statistical Library (VSL) routines, which offer + high-performance vectorized random number generators (RNG) for + several probability distributions, convolution and correlation + routines, and summary statistics functions. +- + + + + Data Fitting Library, which provides capabilities for + spline-based approximation of functions, derivatives and integrals + of functions, and search. +- Extended Eigensolver, a shared memory version of an eigensolver + based on the Feast Eigenvalue Solver. + +For details see the [Intel MKL Reference +Manual](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mklman/index.htm). + +Intel MKL version 13.5.192 is available on Anselm + + $ module load mkl + +The module sets up environment variables, required for linking and +running mkl enabled applications. The most important variables are the +$MKLROOT, $MKL_INC_DIR, $MKL_LIB_DIR and $MKL_EXAMPLES + +The MKL library may be linked using any compiler. +With intel compiler use -mkl option to link default threaded MKL. + +### Interfaces + +The MKL library provides number of interfaces. The fundamental once are +the LP64 and ILP64. The Intel MKL ILP64 libraries use the 64-bit integer +type (necessary for indexing large arrays, with more than 231^-1 +elements), whereas the LP64 libraries index arrays with the 32-bit +integer type. + + |Interface|Integer type| + ----- |---|---|------------------------------------- + |LP64|32-bit, int, integer(kind=4), MPI_INT| + ILP64 64-bit, long int, integer(kind=8), MPI_INT64 + +### Linking + +Linking MKL libraries may be complex. Intel [mkl link line +advisor](http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor) +helps. See also [examples](intel-mkl.html#examples) below. + +You will need the mkl module loaded to run the mkl enabled executable. +This may be avoided, by compiling library search paths into the +executable. Include rpath on the compile line: + + $ icc .... -Wl,-rpath=$LIBRARY_PATH ... + +### Threading + +Advantage in using the MKL library is that it brings threaded +parallelization to applications that are otherwise not parallel. + +For this to work, the application must link the threaded MKL library +(default). Number and behaviour of MKL threads may be controlled via the +OpenMP environment variables, such as OMP_NUM_THREADS and +KMP_AFFINITY. MKL_NUM_THREADS takes precedence over OMP_NUM_THREADS + + $ export OMP_NUM_THREADS=16 + $ export KMP_AFFINITY=granularity=fine,compact,1,0 + +The application will run with 16 threads with affinity optimized for +fine grain parallelization. + +Examples +------------ + +Number of examples, demonstrating use of the MKL library and its linking +is available on Anselm, in the $MKL_EXAMPLES directory. In the +examples below, we demonstrate linking MKL to Intel and GNU compiled +program for multi-threaded matrix multiplication. + +### Working with examples + + $ module load intel + $ module load mkl + $ cp -a $MKL_EXAMPLES/cblas /tmp/ + $ cd /tmp/cblas + + $ make sointel64 function=cblas_dgemm + +In this example, we compile, link and run the cblas_dgemm example, +demonstrating use of MKL example suite installed on Anselm. + +### Example: MKL and Intel compiler + + $ module load intel + $ module load mkl + $ cp -a $MKL_EXAMPLES/cblas /tmp/ + $ cd /tmp/cblas + $ + $ icc -w source/cblas_dgemmx.c source/common_func.c -mkl -o cblas_dgemmx.x + $ ./cblas_dgemmx.x data/cblas_dgemmx.d + +In this example, we compile, link and run the cblas_dgemm example, +demonstrating use of MKL with icc -mkl option. Using the -mkl option is +equivalent to: + + $ icc -w source/cblas_dgemmx.c source/common_func.c -o cblas_dgemmx.x + -I$MKL_INC_DIR -L$MKL_LIB_DIR -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 + +In this example, we compile and link the cblas_dgemm example, using +LP64 interface to threaded MKL and Intel OMP threads implementation. + +### Example: MKL and GNU compiler + + $ module load gcc + $ module load mkl + $ cp -a $MKL_EXAMPLES/cblas /tmp/ + $ cd /tmp/cblas + + $ gcc -w source/cblas_dgemmx.c source/common_func.c -o cblas_dgemmx.x + -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lm + + $ ./cblas_dgemmx.x data/cblas_dgemmx.d + +In this example, we compile, link and run the cblas_dgemm example, +using LP64 interface to threaded MKL and gnu OMP threads implementation. + +MKL and MIC accelerators +------------------------ + +The MKL is capable to automatically offload the computations o the MIC +accelerator. See section [Intel Xeon +Phi](../intel-xeon-phi.html) for details. + +Further reading +--------------- + +Read more on [Intel +website](http://software.intel.com/en-us/intel-mkl), in +particular the [MKL users +guide](https://software.intel.com/en-us/intel-mkl/documentation/linux). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..26077232e61fd20ffac4312e58be70dcc12c7934 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.md @@ -0,0 +1,90 @@ +Intel Parallel Studio +===================== + + + +The Anselm cluster provides following elements of the Intel Parallel +Studio XE + + Intel Parallel Studio XE + ------------------------------------------------- + Intel Compilers + Intel Debugger + Intel MKL Library + Intel Integrated Performance Primitives Library + Intel Threading Building Blocks Library + +Intel compilers +--------------- + +The Intel compilers version 13.1.3 are available, via module intel. The +compilers include the icc C and C++ compiler and the ifort fortran +77/90/95 compiler. + + $ module load intel + $ icc -v + $ ifort -v + +Read more at the [Intel Compilers](intel-compilers.html) +page. + +Intel debugger +-------------- + + The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](https://docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) +for running the GUI. + + $ module load intel + $ idb + +Read more at the [Intel Debugger](intel-debugger.html) +page. + +Intel Math Kernel Library +------------------------- + +Intel Math Kernel Library (Intel MKL) is a library of math kernel +subroutines, extensively threaded and optimized for maximum performance. +Intel MKL unites and provides these basic components: BLAS, LAPACK, +ScaLapack, PARDISO, FFT, VML, VSL, Data fitting, Feast Eigensolver and +many more. + + $ module load mkl + +Read more at the [Intel MKL](intel-mkl.html) page. + +Intel Integrated Performance Primitives +--------------------------------------- + +Intel Integrated Performance Primitives, version 7.1.1, compiled for AVX +is available, via module ipp. The IPP is a library of highly optimized +algorithmic building blocks for media and data applications. This +includes signal, image and frame processing algorithms, such as FFT, +FIR, Convolution, Optical Flow, Hough transform, Sum, MinMax and many +more. + + $ module load ipp + +Read more at the [Intel +IPP](intel-integrated-performance-primitives.html) page. + +Intel Threading Building Blocks +------------------------------- + +Intel Threading Building Blocks (Intel TBB) is a library that supports +scalable parallel programming using standard ISO C++ code. It does not +require special languages or compilers. It is designed to promote +scalable data parallel programming. Additionally, it fully supports +nested parallelism, so you can build larger parallel components from +smaller parallel components. To use the library, you specify tasks, not +threads, and let the library map tasks onto threads in an efficient +manner. + + $ module load tbb + +Read more at the [Intel TBB](intel-tbb.html) page. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.md new file mode 100644 index 0000000000000000000000000000000000000000..29c0fa654de6a6bfe8bf4f34b0ba73c756b28a5b --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.md @@ -0,0 +1,54 @@ +Intel TBB +========= + + + +Intel Threading Building Blocks +------------------------------- + +Intel Threading Building Blocks (Intel TBB) is a library that supports +scalable parallel programming using standard ISO C++ code. It does not +require special languages or compilers. To use the library, you specify +tasks, not threads, and let the library map tasks onto threads in an +efficient manner. The tasks are executed by a runtime scheduler and may +be offloaded to [MIC +accelerator](../intel-xeon-phi.html). + +Intel TBB version 4.1 is available on Anselm + + $ module load tbb + +The module sets up environment variables, required for linking and +running tbb enabled applications. + +Link the tbb library, using -ltbb + +Examples +-------- + +Number of examples, demonstrating use of TBB and its built-in scheduler +is available on Anselm, in the $TBB_EXAMPLES directory. + + $ module load intel + $ module load tbb + $ cp -a $TBB_EXAMPLES/common $TBB_EXAMPLES/parallel_reduce /tmp/ + $ cd /tmp/parallel_reduce/primes + $ icc -O2 -DNDEBUG -o primes.x main.cpp primes.cpp -ltbb + $ ./primes.x + +In this example, we compile, link and run the primes example, +demonstrating use of parallel task-based reduce in computation of prime +numbers. + +You will need the tbb module loaded to run the tbb enabled executable. +This may be avoided, by compiling library search paths into the +executable. + + $ icc -O2 -o primes.x main.cpp primes.cpp -Wl,-rpath=$LIBRARY_PATH -ltbb + +Further reading +--------------- + +Read more on Intel website, +<http://software.intel.com/sites/products/documentation/doclib/tbb_sa/help/index.htm> + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.md new file mode 100644 index 0000000000000000000000000000000000000000..aba81d0dd10865fbf1eae0bcc866efa394615fe4 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.md @@ -0,0 +1,928 @@ +Intel Xeon Phi +============== + +A guide to Intel Xeon Phi usage + + + +Intel Xeon Phi can be programmed in several modes. The default mode on +Anselm is offload mode, but all modes described in this document are +supported. + +Intel Utilities for Xeon Phi +---------------------------- + +To get access to a compute node with Intel Xeon Phi accelerator, use the +PBS interactive session + + $ qsub -I -q qmic -A NONE-0-0 + +To set up the environment module "Intel" has to be loaded + + $ module load intel/13.5.192 + +Information about the hardware can be obtained by running +the micinfo program on the host. + + $ /usr/bin/micinfo + +The output of the "micinfo" utility executed on one of the Anselm node +is as follows. (note: to get PCIe related details the command has to be +run with root privileges) + + MicInfo Utility Log + + Created Mon Jul 22 00:23:50 2013 + +        System Info +                HOST OS                : Linux +                OS Version             : 2.6.32-279.5.2.bl6.Bull.33.x86_64 +                Driver Version         : 6720-15 +                MPSS Version           : 2.1.6720-15 +                Host Physical Memory   : 98843 MB + + Device No: 0, Device Name: mic0 + +        Version +                Flash Version           : 2.1.03.0386 +                SMC Firmware Version    : 1.15.4830 +                SMC Boot Loader Version : 1.8.4326 +                uOS Version             : 2.6.38.8-g2593b11 +                Device Serial Number    : ADKC30102482 + +        Board +                Vendor ID               : 0x8086 +                Device ID               : 0x2250 +                Subsystem ID            : 0x2500 +                Coprocessor Stepping ID : 3 +                PCIe Width              : x16 +                PCIe Speed              : 5 GT/s +                PCIe Max payload size   : 256 bytes +                PCIe Max read req size  : 512 bytes +                Coprocessor Model       : 0x01 +                Coprocessor Model Ext   : 0x00 +                Coprocessor Type        : 0x00 +                Coprocessor Family      : 0x0b +                Coprocessor Family Ext  : 0x00 +                Coprocessor Stepping    : B1 +                Board SKU               : B1PRQ-5110P/5120D +                ECC Mode                : Enabled +                SMC HW Revision         : Product 225W Passive CS + +        Cores +                Total No of Active Cores : 60 +                Voltage                 : 1032000 uV +                Frequency               : 1052631 kHz + +        Thermal +                Fan Speed Control       : N/A +                Fan RPM                 : N/A +                Fan PWM                 : N/A +                Die Temp                : 49 C + +        GDDR +                GDDR Vendor             : Elpida +                GDDR Version            : 0x1 +                GDDR Density            : 2048 Mb +                GDDR Size               : 7936 MB +                GDDR Technology         : GDDR5 +                GDDR Speed              : 5.000000 GT/s +                GDDR Frequency          : 2500000 kHz +                GDDR Voltage            : 1501000 uV + +Offload Mode +------------ + +To compile a code for Intel Xeon Phi a MPSS stack has to be installed on +the machine where compilation is executed. Currently the MPSS stack is +only installed on compute nodes equipped with accelerators. + + $ qsub -I -q qmic -A NONE-0-0 + $ module load intel/13.5.192 + +For debugging purposes it is also recommended to set environment +variable "OFFLOAD_REPORT". Value can be set from 0 to 3, where higher +number means more debugging information. + + export OFFLOAD_REPORT=3 + +A very basic example of code that employs offload programming technique +is shown in the next listing. Please note that this code is sequential +and utilizes only single core of the accelerator. + + $ vim source-offload.cpp + + #include <iostream> + + int main(int argc, char* argv[]) + { +    const int niter = 100000; +    double result = 0; + +  #pragma offload target(mic) +    for (int i = 0; i < niter; ++i) { +        const double t = (i + 0.5) / niter; +        result += 4.0 / (t * t + 1.0); +    } +    result /= niter; +    std::cout << "Pi ~ " << result << 'n'; + } + +To compile a code using Intel compiler run + + $ icc source-offload.cpp -o bin-offload + +To execute the code, run the following command on the host + + ./bin-offload + +### Parallelization in Offload Mode Using OpenMP + +One way of paralelization a code for Xeon Phi is using OpenMP +directives. The following example shows code for parallel vector +addition. + + $ vim ./vect-add + + #include <stdio.h> + + typedef int T; + + #define SIZE 1000 + + #pragma offload_attribute(push, target(mic)) + T in1[SIZE]; + T in2[SIZE]; + T res[SIZE]; + #pragma offload_attribute(pop) + + // MIC function to add two vectors + __attribute__((target(mic))) add_mic(T *a, T *b, T *c, int size) { +  int i = 0; +  #pragma omp parallel for +    for (i = 0; i < size; i++) +      c[i] = a[i] + b[i]; + } + + // CPU function to add two vectors + void add_cpu (T *a, T *b, T *c, int size) { +  int i; +  for (i = 0; i < size; i++) +    c[i] = a[i] + b[i]; + } + + // CPU function to generate a vector of random numbers + void random_T (T *a, int size) { +  int i; +  for (i = 0; i < size; i++) +    a[i] = rand() % 10000; // random number between 0 and 9999 + } + + // CPU function to compare two vectors + int compare(T *a, T *b, T size ){ +  int pass = 0; +  int i; +  for (i = 0; i < size; i++){ +    if (a[i] != b[i]) { +      printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); +      pass = 1; +    } +  } +  if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); +  return pass; + } + + int main() + { +  int i; +  random_T(in1, SIZE); +  random_T(in2, SIZE); + +  #pragma offload target(mic) in(in1,in2) inout(res) +  { + +    // Parallel loop from main function +    #pragma omp parallel for +    for (i=0; i<SIZE; i++) +      res[i] = in1[i] + in2[i]; + +    // or parallel loop is called inside the function +    add_mic(in1, in2, res, SIZE); + +  } + +  //Check the results with CPU implementation +  T res_cpu[SIZE]; +  add_cpu(in1, in2, res_cpu, SIZE); +  compare(res, res_cpu, SIZE); + + } + +During the compilation Intel compiler shows which loops have been +vectorized in both host and accelerator. This can be enabled with +compiler option "-vec-report2". To compile and execute the code run + + $ icc vect-add.c -openmp_report2 -vec-report2 -o vect-add + + $ ./vect-add + +Some interesting compiler flags useful not only for code debugging are: + +Debugging + openmp_report[0|1|2] - controls the compiler based vectorization +diagnostic level + vec-report[0|1|2] - controls the OpenMP parallelizer diagnostic +level + +Performance ooptimization + xhost - FOR HOST ONLY - to generate AVX (Advanced Vector Extensions) +instructions. + +Automatic Offload using Intel MKL Library +----------------------------------------- + +Intel MKL includes an Automatic Offload (AO) feature that enables +computationally intensive MKL functions called in user code to benefit +from attached Intel Xeon Phi coprocessors automatically and +transparently. + +Behavioral of automatic offload mode is controlled by functions called +within the program or by environmental variables. Complete list of +controls is listed [ +here](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_userguide_lnx/GUID-3DC4FC7D-A1E4-423D-9C0C-06AB265FFA86.htm). + +The Automatic Offload may be enabled by either an MKL function call +within the code: + + mkl_mic_enable(); + +or by setting environment variable + + $ export MKL_MIC_ENABLE=1 + +To get more information about automatic offload please refer to "[Using +Intel® MKL Automatic Offload on Intel ® Xeon Phi™ +Coprocessors](http://software.intel.com/sites/default/files/11MIC42_How_to_Use_MKL_Automatic_Offload_0.pdf)" +white paper or [ Intel MKL +documentation](https://software.intel.com/en-us/articles/intel-math-kernel-library-documentation). + +### Automatic offload example + +At first get an interactive PBS session on a node with MIC accelerator +and load "intel" module that automatically loads "mkl" module as well. + + $ qsub -I -q qmic -A OPEN-0-0 -l select=1:ncpus=16 + $ module load intel + +Following example show how to automatically offload an SGEMM (single +precision - g dir="auto">eneral matrix multiply) function to +MIC coprocessor. The code can be copied to a file and compiled without +any necessary modification. + + $ vim sgemm-ao-short.c + + #include <stdio.h> + #include <stdlib.h> + #include <malloc.h> + #include <stdint.h> + + #include "mkl.h" + + int main(int argc, char **argv) + { +        float *A, *B, *C; /* Matrices */ + +        MKL_INT N = 2560; /* Matrix dimensions */ +        MKL_INT LD = N; /* Leading dimension */ +        int matrix_bytes; /* Matrix size in bytes */ +        int matrix_elements; /* Matrix size in elements */ + +        float alpha = 1.0, beta = 1.0; /* Scaling factors */ +        char transa = 'N', transb = 'N'; /* Transposition options */ + +        int i, j; /* Counters */ + +        matrix_elements = N * N; +        matrix_bytes = sizeof(float) * matrix_elements; + +        /* Allocate the matrices */ +        A = malloc(matrix_bytes); B = malloc(matrix_bytes); C = malloc(matrix_bytes); + +        /* Initialize the matrices */ +        for (i = 0; i < matrix_elements; i++) { +                A[i] = 1.0; B[i] = 2.0; C[i] = 0.0; +        } + +        printf("Computing SGEMM on the hostn"); +        sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + +        printf("Enabling Automatic Offloadn"); +        /* Alternatively, set environment variable MKL_MIC_ENABLE=1 */ +        mkl_mic_enable(); +        +        int ndevices = mkl_mic_get_device_count(); /* Number of MIC devices */ +        printf("Automatic Offload enabled: %d MIC devices presentn",  ndevices); + +        printf("Computing SGEMM with automatic workdivisionn"); +        sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + +        /* Free the matrix memory */ +        free(A); free(B); free(C); + +        printf("Donen"); + +    return 0; + } + +Please note: This example is simplified version of an example from MKL. +The expanded version can be found here: +$MKL_EXAMPLES/mic_ao/blasc/source/sgemm.c** + +To compile a code using Intel compiler use: + + $ icc -mkl sgemm-ao-short.c -o sgemm + +For debugging purposes enable the offload report to see more information +about automatic offloading. + + $ export OFFLOAD_REPORT=2 + +The output of a code should look similar to following listing, where +lines starting with [MKL] are generated by offload reporting: + + Computing SGEMM on the host + Enabling Automatic Offload + Automatic Offload enabled: 1 MIC devices present + Computing SGEMM with automatic workdivision + [MKL] [MIC --] [AO Function]   SGEMM + [MKL] [MIC --] [AO SGEMM Workdivision] 0.00 1.00 + [MKL] [MIC 00] [AO SGEMM CPU Time]     0.463351 seconds + [MKL] [MIC 00] [AO SGEMM MIC Time]     0.179608 seconds + [MKL] [MIC 00] [AO SGEMM CPU->MIC Data] 52428800 bytes + [MKL] [MIC 00] [AO SGEMM MIC->CPU Data] 26214400 bytes + Done + + + +Native Mode +----------- + +In the native mode a program is executed directly on Intel Xeon Phi +without involvement of the host machine. Similarly to offload mode, the +code is compiled on the host computer with Intel compilers. + +To compile a code user has to be connected to a compute with MIC and +load Intel compilers module. To get an interactive session on a compute +node with an Intel Xeon Phi and load the module use following commands: + + $ qsub -I -q qmic -A NONE-0-0 + + $ module load intel/13.5.192 + +Please note that particular version of the Intel module is specified. +This information is used later to specify the correct library paths. + +To produce a binary compatible with Intel Xeon Phi architecture user has +to specify "-mmic" compiler flag. Two compilation examples are shown +below. The first example shows how to compile OpenMP parallel code +"vect-add.c" for host only: + + $ icc -xhost -no-offload -fopenmp vect-add.c -o vect-add-host + +To run this code on host, use: + + $ ./vect-add-host + +The second example shows how to compile the same code for Intel Xeon +Phi: + + $ icc -mmic -fopenmp vect-add.c -o vect-add-mic + +### Execution of the Program in Native Mode on Intel Xeon Phi + +The user access to the Intel Xeon Phi is through the SSH. Since user +home directories are mounted using NFS on the accelerator, users do not +have to copy binary files or libraries between the host and accelerator. + + +To connect to the accelerator run: + + $ ssh mic0 + +If the code is sequential, it can be executed directly: + + mic0 $ ~/path_to_binary/vect-add-seq-mic + +If the code is parallelized using OpenMP a set of additional libraries +is required for execution. To locate these libraries new path has to be +added to the LD_LIBRARY_PATH environment variable prior to the +execution: + + mic0 $ export LD_LIBRARY_PATH=/apps/intel/composer_xe_2013.5.192/compiler/lib/mic:$LD_LIBRARY_PATH + +Please note that the path exported in the previous example contains path +to a specific compiler (here the version is 5.192). This version number +has to match with the version number of the Intel compiler module that +was used to compile the code on the host computer. + +For your information the list of libraries and their location required +for execution of an OpenMP parallel code on Intel Xeon Phi is: + +/apps/intel/composer_xe_2013.5.192/compiler/lib/mic + +libiomp5.so +libimf.so +libsvml.so +libirng.so +libintlc.so.5 + +Finally, to run the compiled code use: + + $ ~/path_to_binary/vect-add-mic + +OpenCL +------------------- + +OpenCL (Open Computing Language) is an open standard for +general-purpose parallel programming for diverse mix of multi-core CPUs, +GPU coprocessors, and other parallel processors. OpenCL provides a +flexible execution model and uniform programming environment for +software developers to write portable code for systems running on both +the CPU and graphics processors or accelerators like the Intel® Xeon +Phi. + +On Anselm OpenCL is installed only on compute nodes with MIC +accelerator, therefore OpenCL code can be compiled only on these nodes. + + module load opencl-sdk opencl-rt + +Always load "opencl-sdk" (providing devel files like headers) and +"opencl-rt" (providing dynamic library libOpenCL.so) modules to compile +and link OpenCL code. Load "opencl-rt" for running your compiled code. + +There are two basic examples of OpenCL code in the following +directory: + + /apps/intel/opencl-examples/ + +First example "CapsBasic" detects OpenCL compatible hardware, here +CPU and MIC, and prints basic information about the capabilities of +it. + + /apps/intel/opencl-examples/CapsBasic/capsbasic + +To compile and run the example copy it to your home directory, get +a PBS interactive session on of the nodes with MIC and run make for +compilation. Make files are very basic and shows how the OpenCL code can +be compiled on Anselm. + + $ cp /apps/intel/opencl-examples/CapsBasic/* . + $ qsub -I -q qmic -A NONE-0-0 + $ make + +The compilation command for this example is: + + $ g++ capsbasic.cpp -lOpenCL -o capsbasic -I/apps/intel/opencl/include/ + +After executing the complied binary file, following output should +be displayed. + + ./capsbasic + + Number of available platforms: 1 + Platform names: +    [0] Intel(R) OpenCL [Selected] + Number of devices available for each type: +    CL_DEVICE_TYPE_CPU: 1 +    CL_DEVICE_TYPE_GPU: 0 +    CL_DEVICE_TYPE_ACCELERATOR: 1 + + ** Detailed information for each device *** + + CL_DEVICE_TYPE_CPU[0] +    CL_DEVICE_NAME:       Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz +    CL_DEVICE_AVAILABLE: 1 + + ... + + CL_DEVICE_TYPE_ACCELERATOR[0] +    CL_DEVICE_NAME: Intel(R) Many Integrated Core Acceleration Card +    CL_DEVICE_AVAILABLE: 1 + + ... + +More information about this example can be found on Intel website: +<http://software.intel.com/en-us/vcsource/samples/caps-basic/> + +The second example that can be found in +"/apps/intel/opencl-examples" >directory is General Matrix +Multiply. You can follow the the same procedure to download the example +to your directory and compile it. + + $ cp -r /apps/intel/opencl-examples/* . + $ qsub -I -q qmic -A NONE-0-0 + $ cd GEMM + $ make + +The compilation command for this example is: + + $ g++ cmdoptions.cpp gemm.cpp ../common/basic.cpp ../common/cmdparser.cpp ../common/oclobject.cpp -I../common -lOpenCL -o gemm -I/apps/intel/opencl/include/ + +To see the performance of Intel Xeon Phi performing the DGEMM run +the example as follows: + + ./gemm -d 1 + Platforms (1): + [0] Intel(R) OpenCL [Selected] + Devices (2): + [0] Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz + [1] Intel(R) Many Integrated Core Acceleration Card [Selected] + Build program options: "-DT=float -DTILE_SIZE_M=1 -DTILE_GROUP_M=16 -DTILE_SIZE_N=128 -DTILE_GROUP_N=1 -DTILE_SIZE_K=8" + Running gemm_nn kernel with matrix size: 3968x3968 + Memory row stride to ensure necessary alignment: 15872 bytes + Size of memory region for one matrix: 62980096 bytes + Using alpha = 0.57599 and beta = 0.872412 + ... + Host time: 0.292953 sec. + Host perf: 426.635 GFLOPS + Host time: 0.293334 sec. + Host perf: 426.081 GFLOPS + ... + +Please note: GNU compiler is used to compile the OpenCL codes for +Intel MIC. You do not need to load Intel compiler module. + +MPI +----------------- + +### Environment setup and compilation + +Again an MPI code for Intel Xeon Phi has to be compiled on a compute +node with accelerator and MPSS software stack installed. To get to a +compute node with accelerator use: + + $ qsub -I -q qmic -A NONE-0-0 + +The only supported implementation of MPI standard for Intel Xeon Phi is +Intel MPI. To setup a fully functional development environment a +combination of Intel compiler and Intel MPI has to be used. On a host +load following modules before compilation: + + $ module load intel/13.5.192 impi/4.1.1.036 + +To compile an MPI code for host use: + + $ mpiicc -xhost -o mpi-test mpi-test.c + +To compile the same code for Intel Xeon Phi architecture use: + + $ mpiicc -mmic -o mpi-test-mic mpi-test.c + +An example of basic MPI version of "hello-world" example in C language, +that can be executed on both host and Xeon Phi is (can be directly copy +and pasted to a .c file) + + #include <stdio.h> + #include <mpi.h> + + int main (argc, argv) +     int argc; +     char *argv[]; + { +  int rank, size; + +  int len; +  char node[MPI_MAX_PROCESSOR_NAME]; + +  MPI_Init (&argc, &argv);     /* starts MPI */ +  MPI_Comm_rank (MPI_COMM_WORLD, &rank);       /* get current process id */ +  MPI_Comm_size (MPI_COMM_WORLD, &size);       /* get number of processes */ + +  MPI_Get_processor_name(node,&len); + +  printf( "Hello world from process %d of %d on host %s n", rank, size, node ); +  MPI_Finalize(); +  return 0; + } + +### MPI programming models + +Intel MPI for the Xeon Phi coprocessors offers different MPI +programming models: + +Host-only model** - all MPI ranks reside on the host. The coprocessors +can be used by using offload pragmas. (Using MPI calls inside offloaded +code is not supported.)** + +Coprocessor-only model** - all MPI ranks reside only on the +coprocessors. + +Symmetric model** - the MPI ranks reside on both the host and the +coprocessor. Most general MPI case. + +###Host-only model + +In this case all environment variables are set by modules, +so to execute the compiled MPI program on a single node, use: + + $ mpirun -np 4 ./mpi-test + +The output should be similar to: + + Hello world from process 1 of 4 on host cn207 + Hello world from process 3 of 4 on host cn207 + Hello world from process 2 of 4 on host cn207 + Hello world from process 0 of 4 on host cn207 + +### Coprocessor-only model + +There are two ways how to execute an MPI code on a single +coprocessor: 1.) lunch the program using "**mpirun**" from the +coprocessor; or 2.) lunch the task using "**mpiexec.hydra**" from a +host. + +Execution on coprocessor** + +Similarly to execution of OpenMP programs in native mode, since the +environmental module are not supported on MIC, user has to setup paths +to Intel MPI libraries and binaries manually. One time setup can be done +by creating a "**.profile**" file in user's home directory. This file +sets up the environment on the MIC automatically once user access to the +accelerator through the SSH. + + $ vim ~/.profile + + PS1='[u@h W]$ ' + export PATH=/usr/bin:/usr/sbin:/bin:/sbin + + #OpenMP + export LD_LIBRARY_PATH=/apps/intel/composer_xe_2013.5.192/compiler/lib/mic:$LD_LIBRARY_PATH + + #Intel MPI + export LD_LIBRARY_PATH=/apps/intel/impi/4.1.1.036/mic/lib/:$LD_LIBRARY_PATH + export PATH=/apps/intel/impi/4.1.1.036/mic/bin/:$PATH + +Please note: + - this file sets up both environmental variable for both MPI and OpenMP +libraries. + - this file sets up the paths to a particular version of Intel MPI +library and particular version of an Intel compiler. These versions have +to match with loaded modules. + +To access a MIC accelerator located on a node that user is currently +connected to, use: + + $ ssh mic0 + +or in case you need specify a MIC accelerator on a particular node, use: + + $ ssh cn207-mic0 + +To run the MPI code in parallel on multiple core of the accelerator, +use: + + $ mpirun -np 4 ./mpi-test-mic + +The output should be similar to: + + Hello world from process 1 of 4 on host cn207-mic0 + Hello world from process 2 of 4 on host cn207-mic0 + Hello world from process 3 of 4 on host cn207-mic0 + Hello world from process 0 of 4 on host cn207-mic0 + +**Execution on host** + +If the MPI program is launched from host instead of the coprocessor, the +environmental variables are not set using the ".profile" file. Therefore +user has to specify library paths from the command line when calling +"mpiexec". + +First step is to tell mpiexec that the MPI should be executed on a local +accelerator by setting up the environmental variable "I_MPI_MIC" + + $ export I_MPI_MIC=1 + +Now the MPI program can be executed as: + + $ mpiexec.hydra -genv LD_LIBRARY_PATH /apps/intel/impi/4.1.1.036/mic/lib/ -host mic0 -n 4 ~/mpi-test-mic + +or using mpirun + + $ mpirun -genv LD_LIBRARY_PATH /apps/intel/impi/4.1.1.036/mic/lib/ -host mic0 -n 4 ~/mpi-test-mic + +Please note: + - the full path to the binary has to specified (here: +"**>~/mpi-test-mic**") + - the LD_LIBRARY_PATH has to match with Intel MPI module used to +compile the MPI code + +The output should be again similar to: + + Hello world from process 1 of 4 on host cn207-mic0 + Hello world from process 2 of 4 on host cn207-mic0 + Hello world from process 3 of 4 on host cn207-mic0 + Hello world from process 0 of 4 on host cn207-mic0 + +Please note that the "mpiexec.hydra" requires a file +"**>pmi_proxy**" from Intel MPI library to be copied to the +MIC filesystem. If the file is missing please contact the system +administrators. A simple test to see if the file is present is to +execute: + +   $ ssh mic0 ls /bin/pmi_proxy +  /bin/pmi_proxy + +**Execution on host - MPI processes distributed over multiple +accelerators on multiple nodes** + +To get access to multiple nodes with MIC accelerator, user has to +use PBS to allocate the resources. To start interactive session, that +allocates 2 compute nodes = 2 MIC accelerators run qsub command with +following parameters: + + $ qsub -I -q qmic -A NONE-0-0 -l select=2:ncpus=16 + + $ module load intel/13.5.192 impi/4.1.1.036 + +This command connects user through ssh to one of the nodes +immediately. To see the other nodes that have been allocated use: + + $ cat $PBS_NODEFILE + +For example: + + cn204.bullx + cn205.bullx + +This output means that the PBS allocated nodes cn204 and cn205, +which means that user has direct access to "**cn204-mic0**" and +"**cn-205-mic0**" accelerators. + +Please note: At this point user can connect to any of the +allocated nodes or any of the allocated MIC accelerators using ssh: +- to connect to the second node : ** $ ssh +cn205** +- to connect to the accelerator on the first node from the first +node: **$ ssh cn204-mic0** or + $ ssh mic0** +-** to connect to the accelerator on the second node from the first +node: **$ ssh cn205-mic0** + +At this point we expect that correct modules are loaded and binary +is compiled. For parallel execution the mpiexec.hydra is used. +Again the first step is to tell mpiexec that the MPI can be executed on +MIC accelerators by setting up the environmental variable "I_MPI_MIC" + + $ export I_MPI_MIC=1 + +The launch the MPI program use: + + $ mpiexec.hydra -genv LD_LIBRARY_PATH /apps/intel/impi/4.1.1.036/mic/lib/ + -genv I_MPI_FABRICS_LIST tcp +  -genv I_MPI_FABRICS shm:tcp +  -genv I_MPI_TCP_NETMASK=10.1.0.0/16 + -host cn204-mic0 -n 4 ~/mpi-test-mic + : -host cn205-mic0 -n 6 ~/mpi-test-mic + +or using mpirun: + + $ mpirun -genv LD_LIBRARY_PATH /apps/intel/impi/4.1.1.036/mic/lib/ + -genv I_MPI_FABRICS_LIST tcp +  -genv I_MPI_FABRICS shm:tcp +  -genv I_MPI_TCP_NETMASK=10.1.0.0/16 + -host cn204-mic0 -n 4 ~/mpi-test-mic + : -host cn205-mic0 -n 6 ~/mpi-test-mic + +In this case four MPI processes are executed on accelerator cn204-mic +and six processes are executed on accelerator cn205-mic0. The sample +output (sorted after execution) is: + + Hello world from process 0 of 10 on host cn204-mic0 + Hello world from process 1 of 10 on host cn204-mic0 + Hello world from process 2 of 10 on host cn204-mic0 + Hello world from process 3 of 10 on host cn204-mic0 + Hello world from process 4 of 10 on host cn205-mic0 + Hello world from process 5 of 10 on host cn205-mic0 + Hello world from process 6 of 10 on host cn205-mic0 + Hello world from process 7 of 10 on host cn205-mic0 + Hello world from process 8 of 10 on host cn205-mic0 + Hello world from process 9 of 10 on host cn205-mic0 + +The same way MPI program can be executed on multiple hosts: + + $ mpiexec.hydra -genv LD_LIBRARY_PATH /apps/intel/impi/4.1.1.036/mic/lib/ + -genv I_MPI_FABRICS_LIST tcp +  -genv I_MPI_FABRICS shm:tcp +  -genv I_MPI_TCP_NETMASK=10.1.0.0/16 + -host cn204 -n 4 ~/mpi-test + : -host cn205 -n 6 ~/mpi-test + +###Symmetric model + +In a symmetric mode MPI programs are executed on both host +computer(s) and MIC accelerator(s). Since MIC has a different +architecture and requires different binary file produced by the Intel +compiler two different files has to be compiled before MPI program is +executed. + +In the previous section we have compiled two binary files, one for +hosts "**mpi-test**" and one for MIC accelerators "**mpi-test-mic**". +These two binaries can be executed at once using mpiexec.hydra: + + $ mpiexec.hydra + -genv I_MPI_FABRICS_LIST tcp + -genv I_MPI_FABRICS shm:tcp +  -genv I_MPI_TCP_NETMASK=10.1.0.0/16 + -genv LD_LIBRARY_PATH /apps/intel/impi/4.1.1.036/mic/lib/ + -host cn205 -n 2 ~/mpi-test + : -host cn205-mic0 -n 2 ~/mpi-test-mic + +In this example the first two parameters (line 2 and 3) sets up required +environment variables for execution. The third line specifies binary +that is executed on host (here cn205) and the last line specifies the +binary that is execute on the accelerator (here cn205-mic0). + +The output of the program is: + + Hello world from process 0 of 4 on host cn205 + Hello world from process 1 of 4 on host cn205 + Hello world from process 2 of 4 on host cn205-mic0 + Hello world from process 3 of 4 on host cn205-mic0 + +The execution procedure can be simplified by using the mpirun +command with the machine file a a parameter. Machine file contains list +of all nodes and accelerators that should used to execute MPI processes. + +An example of a machine file that uses 2 >hosts (**cn205** +and **cn206**) and 2 accelerators **(cn205-mic0** and **cn206-mic0**) to +run 2 MPI processes on each of them: + + $ cat hosts_file_mix + cn205:2 + cn205-mic0:2 + cn206:2 + cn206-mic0:2 + +In addition if a naming convention is set in a way that the name +of the binary for host is **"bin_name"** and the name of the binary +for the accelerator is **"bin_name-mic"** then by setting up the +environment variable **I_MPI_MIC_POSTFIX** to **"-mic"** user do not +have to specify the names of booth binaries. In this case mpirun needs +just the name of the host binary file (i.e. "mpi-test") and uses the +suffix to get a name of the binary for accelerator (i..e. +"mpi-test-mic"). + + $ export I_MPI_MIC_POSTFIX=-mic + + >To run the MPI code using mpirun and the machine file +"hosts_file_mix" use: + + $ mpirun + -genv I_MPI_FABRICS shm:tcp + -genv LD_LIBRARY_PATH /apps/intel/impi/4.1.1.036/mic/lib/ + -genv I_MPI_FABRICS_LIST tcp +  -genv I_MPI_FABRICS shm:tcp +  -genv I_MPI_TCP_NETMASK=10.1.0.0/16 + -machinefile hosts_file_mix + ~/mpi-test + +A possible output of the MPI "hello-world" example executed on two +hosts and two accelerators is: + + Hello world from process 0 of 8 on host cn204 + Hello world from process 1 of 8 on host cn204 + Hello world from process 2 of 8 on host cn204-mic0 + Hello world from process 3 of 8 on host cn204-mic0 + Hello world from process 4 of 8 on host cn205 + Hello world from process 5 of 8 on host cn205 + Hello world from process 6 of 8 on host cn205-mic0 + Hello world from process 7 of 8 on host cn205-mic0 + +Please note: At this point the MPI communication between MIC +accelerators on different nodes uses 1Gb Ethernet only. + +Using the PBS automatically generated node-files + +PBS also generates a set of node-files that can be used instead of +manually creating a new one every time. Three node-files are genereated: + +**Host only node-file:** + - /lscratch/${PBS_JOBID}/nodefile-cn +MIC only node-file: + - /lscratch/${PBS_JOBID}/nodefile-mic +Host and MIC node-file: + - /lscratch/${PBS_JOBID}/nodefile-mix + +Please note each host or accelerator is listed only per files. User has +to specify how many jobs should be executed per node using "-n" +parameter of the mpirun command. + +Optimization +------------ + +For more details about optimization techniques please read Intel +document [Optimization and Performance Tuning for Intel® Xeon Phi™ +Coprocessors](http://software.intel.com/en-us/articles/optimization-and-performance-tuning-for-intel-xeon-phi-coprocessors-part-1-optimization "http://software.intel.com/en-us/articles/optimization-and-performance-tuning-for-intel-xeon-phi-coprocessors-part-1-optimization") + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.md new file mode 100644 index 0000000000000000000000000000000000000000..719fa3fd918379f0dd6564387b74270fdc5be2bd --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.md @@ -0,0 +1,149 @@ +ISV Licenses +============ + +A guide to managing Independent Software Vendor licences + + + +On Anselm cluster there are also installed commercial software +applications, also known as ISV (Independent Software Vendor), which are +subjects to licensing. The licenses are limited and their usage may be +restricted only to some users or user groups. + +Currently Flex License Manager based licensing is supported on the +cluster for products Ansys, Comsol and Matlab. More information about +the applications can be found in the general +[Software](../software.1.html) section. + +If an ISV application was purchased for educational (research) purposes +and also for commercial purposes, then there are always two separate +versions maintained and suffix "edu" is used in the name of the +non-commercial version. + +Overview of the licenses usage +------------------------------ + +The overview is generated every minute and is accessible from web or +command line interface. + +### Web interface + +For each license there is a table, which provides the information about +the name, number of available (purchased/licensed), number of used and +number of free license features + +<https://extranet.it4i.cz/anselm/licenses> + +### Text interface + +For each license there is a unique text file, which provides the +information about the name, number of available (purchased/licensed), +number of used and number of free license features. The text files are +accessible from the Anselm command prompt. + + Product File with license state Note + ------ |---|---|------------------------------------------- --------------------- + ansys /apps/user/licenses/ansys_features_state.txt Commercial + comsol /apps/user/licenses/comsol_features_state.txt Commercial + comsol-edu /apps/user/licenses/comsol-edu_features_state.txt Non-commercial only + matlab /apps/user/licenses/matlab_features_state.txt Commercial + matlab-edu /apps/user/licenses/matlab-edu_features_state.txt Non-commercial only + +The file has a header which serves as a legend. All the info in the +legend starts with a hash (#) so it can be easily filtered when parsing +the file via a script. + +Example of the Commercial Matlab license state: + + $ cat /apps/user/licenses/matlab_features_state.txt + # matlab + # ------------------------------------------------- + # FEATURE TOTAL USED AVAIL + # ------------------------------------------------- + MATLAB 1 1 0 + SIMULINK 1 0 1 + Curve_Fitting_Toolbox 1 0 1 + Signal_Blocks 1 0 1 + GADS_Toolbox 1 0 1 + Image_Toolbox 1 0 1 + Compiler 1 0 1 + Neural_Network_Toolbox 1 0 1 + Optimization_Toolbox 1 0 1 + Signal_Toolbox 1 0 1 + Statistics_Toolbox 1 0 1 + +License tracking in PBS Pro scheduler and users usage +----------------------------------------------------- + +Each feature of each license is accounted and checked by the scheduler +of PBS Pro. If you ask for certain licences, the scheduler won't start +the job until the asked licenses are free (available). This prevents to +crash batch jobs, just because of + unavailability of the +needed licenses. + +The general format of the name is: + +feature__APP__FEATURE** + +Names of applications (APP): + +- ansys +- comsol +- comsol-edu +- matlab +- matlab-edu + + + +To get the FEATUREs of a license take a look into the corresponding +state file ([see above](isv_licenses.html#Licence)), or +use: + + |Application |List of provided features | + | --- | --- | + |ansys |<pre><code>$ grep -v "#" /apps/user/licenses/ansys_features_state.txt | cut -f1 -d' '</code></pre> | + |comsol |<pre><code>$ grep -v "#" /apps/user/licenses/comsol_features_state.txt | cut -f1 -d' '</code></pre> | + |comsol-edu |<pre><code>$ grep -v "#" /apps/user/licenses/comsol-edu_features_state.txt | cut -f1 -d' '</code></pre> | + |matlab |<pre><code>$ grep -v "#" /apps/user/licenses/matlab_features_state.txt | cut -f1 -d' '</code></pre> | + |matlab-edu |<pre><code>$ grep -v "#" /apps/user/licenses/matlab-edu_features_state.txt | cut -f1 -d' '</code></pre> | + + + +Example of PBS Pro resource name, based on APP and FEATURE name: + +<col width="33%" /> +<col width="33%" /> +<col width="33%" /> + |Application |Feature |PBS Pro resource name | + | --- | --- | + |ansys |acfd |feature__ansys__acfd | + |ansys |aa_r |feature__ansys__aa_r | + |comsol |COMSOL |feature__comsol__COMSOL | + |comsol |HEATTRANSFER |feature__comsol__HEATTRANSFER | + |comsol-edu |COMSOLBATCH |feature__comsol-edu__COMSOLBATCH | + |comsol-edu |STRUCTURALMECHANICS |feature__comsol-edu__STRUCTURALMECHANICS | + |matlab |MATLAB |feature__matlab__MATLAB | + |matlab |Image_Toolbox |feature__matlab__Image_Toolbox | + |matlab-edu |MATLAB_Distrib_Comp_Engine |feature__matlab-edu__MATLAB_Distrib_Comp_Engine | + |matlab-edu |Image_Acquisition_Toolbox |feature__matlab-edu__Image_Acquisition_Toolbox\ | + +Be aware, that the resource names in PBS Pro are CASE SENSITIVE!** + +### Example of qsub statement + +Run an interactive PBS job with 1 Matlab EDU license, 1 Distributed +Computing Toolbox and 32 Distributed Computing Engines (running on 32 +cores): + + $ qsub -I -q qprod -A PROJECT_ID -l select=2:ncpus=16 -l feature__matlab-edu__MATLAB=1 -l feature__matlab-edu__Distrib_Computing_Toolbox=1 -l feature__matlab-edu__MATLAB_Distrib_Comp_Engine=32 + +The license is used and accounted only with the real usage of the +product. So in this example, the general Matlab is used after Matlab is +run vy the user and not at the time, when the shell of the interactive +job is started. Also the Distributed Computing licenses are used at the +time, when the user uses the distributed parallel computation in Matlab +(e. g. issues pmode start, matlabpool, etc.). + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/java.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/java.md new file mode 100644 index 0000000000000000000000000000000000000000..9094578fb82ad669d7ec1cd25caaf132bc73fc22 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/java.md @@ -0,0 +1,33 @@ +Java +==== + +Java on ANSELM + + + +Java is available on Anselm cluster. Activate java by loading the java +module + + $ module load java + +Note that the java module must be loaded on the compute nodes as well, +in order to run java on compute nodes. + +Check for java version and path + + $ java -version + $ which java + +With the module loaded, not only the runtime environment (JRE), but also +the development environment (JDK) with the compiler is available. + + $ javac -version + $ which javac + +Java applications may use MPI for interprocess communication, in +conjunction with OpenMPI. Read more +on <http://www.open-mpi.org/faq/?category=java>. +This functionality is currently not supported on Anselm cluster. In case +you require the java interface to MPI, please contact [Anselm +support](https://support.it4i.cz/rt/). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.md new file mode 100644 index 0000000000000000000000000000000000000000..803b896d1f2dc44638431b0e9c4d24efd4699d7e --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.md @@ -0,0 +1,484 @@ +Virtualization +============== + +Running virtual machines on compute nodes + + + +Introduction +------------ + +There are situations when Anselm's environment is not suitable for user +needs. + +- Application requires different operating system (e.g Windows), + application is not available for Linux +- Application requires different versions of base system libraries and + tools +- Application requires specific setup (installation, configuration) of + complex software stack +- Application requires privileged access to operating system +- ... and combinations of above cases + + We offer solution for these cases - **virtualization**. Anselm's +environment gives the possibility to run virtual machines on compute +nodes. Users can create their own images of operating system with +specific software stack and run instances of these images as virtual +machines on compute nodes. Run of virtual machines is provided by +standard mechanism of [Resource Allocation and Job +Execution](../../resource-allocation-and-job-execution/introduction.html). + +Solution is based on QEMU-KVM software stack and provides +hardware-assisted x86 virtualization. + +Limitations +----------- + +Anselm's infrastructure was not designed for virtualization. Anselm's +environment is not intended primary for virtualization, compute nodes, +storages and all infrastructure of Anselm is intended and optimized for +running HPC jobs, this implies suboptimal configuration of +virtualization and limitations. + +Anselm's virtualization does not provide performance and all features of +native environment. There is significant performance hit (degradation) +in I/O performance (storage, network). Anselm's virtualization is not +suitable for I/O (disk, network) intensive workloads. + +Virtualization has also some drawbacks, it is not so easy to setup +efficient solution. + +Solution described in chapter +[HOWTO](virtualization.html#howto) + is suitable for single node tasks, does not +introduce virtual machine clustering. + +Please consider virtualization as last resort solution for your needs. + +Please consult use of virtualization with IT4Innovation's support. + +For running Windows application (when source code and Linux native +application are not available) consider use of Wine, Windows +compatibility layer. Many Windows applications can be run using Wine +with less effort and better performance than when using virtualization. + +Licensing +--------- + +IT4Innovations does not provide any licenses for operating systems and +software of virtual machines. Users are ( > +in accordance with [Acceptable use policy +document](http://www.it4i.cz/acceptable-use-policy.pdf)) +fully responsible for licensing all software running in virtual machines +on Anselm. Be aware of complex conditions of licensing software in +virtual environments. + +Users are responsible for licensing OS e.g. MS Windows and all software +running in their virtual machines. + + HOWTO +---------- + +### Virtual Machine Job Workflow + +We propose this job workflow: + +Workflow](virtualization-job-workflow "Virtualization Job Workflow") + +Our recommended solution is that job script creates distinct shared job +directory, which makes a central point for data exchange between +Anselm's environment, compute node (host) (e.g HOME, SCRATCH, local +scratch and other local or cluster filesystems) and virtual machine +(guest). Job script links or copies input data and instructions what to +do (run script) for virtual machine to job directory and virtual machine +process input data according instructions in job directory and store +output back to job directory. We recommend, that virtual machine is +running in so called [snapshot +mode](virtualization.html#snapshot-mode), image is +immutable - image does not change, so one image can be used for many +concurrent jobs. + +### Procedure + +1. Prepare image of your virtual machine +2. Optimize image of your virtual machine for Anselm's virtualization +3. Modify your image for running jobs +4. Create job script for executing virtual machine +5. Run jobs + +### Prepare image of your virtual machine + +You can either use your existing image or create new image from scratch. + +QEMU currently supports these image types or formats: + +- raw +- cloop +- cow +- qcow +- qcow2 +- vmdk - VMware 3 & 4, or 6 image format, for exchanging images with + that product +- vdi - VirtualBox 1.1 compatible image format, for exchanging images + with VirtualBox. + +You can convert your existing image using qemu-img convert command. +Supported formats of this command are: blkdebug blkverify bochs cloop +cow dmg file ftp ftps host_cdrom host_device host_floppy http https +nbd parallels qcow qcow2 qed raw sheepdog tftp vdi vhdx vmdk vpc vvfat. + +We recommend using advanced QEMU native image format qcow2. + +[More about QEMU +Images](http://en.wikibooks.org/wiki/QEMU/Images) + +### Optimize image of your virtual machine + +Use virtio devices (for disk/drive and network adapter) and install +virtio drivers (paravirtualized drivers) into virtual machine. There is +significant performance gain when using virtio drivers. For more +information see [Virtio +Linux](http://www.linux-kvm.org/page/Virtio) and [Virtio +Windows](http://www.linux-kvm.org/page/WindowsGuestDrivers/Download_Drivers). + +Disable all +unnecessary services +and tasks. Restrict all unnecessary operating system operations. + +Remove all +unnecessary software and +files. + + +Remove all paging +space, swap files, partitions, etc. + +Shrink your image. (It is recommended to zero all free space and +reconvert image using qemu-img.) + +### Modify your image for running jobs + +Your image should run some kind of operating system startup script. +Startup script should run application and when application exits run +shutdown or quit virtual machine. + +We recommend, that startup script + +maps Job Directory from host (from compute node) +runs script (we call it "run script") from Job Directory and waits for +application's exit +- for management purposes if run script does not exist wait for some + time period (few minutes) + +shutdowns/quits OS +For Windows operating systems we suggest using Local Group Policy +Startup script, for Linux operating systems rc.local, runlevel init +script or similar service. + +Example startup script for Windows virtual machine: + + @echo off + set LOG=c:startup.log + set MAPDRIVE=z: + set SCRIPT=%MAPDRIVE%run.bat + set TIMEOUT=300 + + echo %DATE% %TIME% Running startup script>%LOG% + + rem Mount share + echo %DATE% %TIME% Mounting shared drive>%LOG% + net use z: 10.0.2.4qemu >%LOG% 2>&1 + dir z: >%LOG% 2>&1 + echo. >%LOG% + + if exist %MAPDRIVE% ( +  echo %DATE% %TIME% The drive "%MAPDRIVE%" exists>%LOG% + +  if exist %SCRIPT% ( +    echo %DATE% %TIME% The script file "%SCRIPT%"exists>%LOG% +    echo %DATE% %TIME% Running script %SCRIPT%>%LOG% +    set TIMEOUT=0 +    call %SCRIPT% +  ) else ( +    echo %DATE% %TIME% The script file "%SCRIPT%"does not exist>%LOG% +  ) + + ) else ( +  echo %DATE% %TIME% The drive "%MAPDRIVE%" does not exist>%LOG% + ) + echo. >%LOG% + + timeout /T %TIMEOUT% + + echo %DATE% %TIME% Shut down>%LOG% + shutdown /s /t 0 + +Example startup script maps shared job script as drive z: and looks for +run script called run.bat. If run script is found it is run else wait +for 5 minutes, then shutdown virtual machine. + +### Create job script for executing virtual machine + +Create job script according recommended + +[Virtual Machine Job +Workflow](virtualization.html#virtual-machine-job-workflow). + +Example job for Windows virtual machine: + + #/bin/sh + + JOB_DIR=/scratch/$USER/win/${PBS_JOBID} + + #Virtual machine settings + VM_IMAGE=~/work/img/win.img + VM_MEMORY=49152 + VM_SMP=16 + + # Prepare job dir + mkdir -p ${JOB_DIR} && cd ${JOB_DIR} || exit 1 + ln -s ~/work/win . + ln -s /scratch/$USER/data . + ln -s ~/work/win/script/run/run-appl.bat run.bat + + # Run virtual machine + export TMPDIR=/lscratch/${PBS_JOBID} + module add qemu + qemu-system-x86_64 +  -enable-kvm +  -cpu host +  -smp ${VM_SMP} +  -m ${VM_MEMORY} +  -vga std +  -localtime +  -usb -usbdevice tablet +  -device virtio-net-pci,netdev=net0 +  -netdev user,id=net0,smb=${JOB_DIR},hostfwd=tcp::3389-:3389 +  -drive file=${VM_IMAGE},media=disk,if=virtio +  -snapshot +  -nographic + +Job script links application data (win), input data (data) and run +script (run.bat) into job directory and runs virtual machine. + +Example run script (run.bat) for Windows virtual machine: + + z: + cd winappl + call application.bat z:data z:output + +Run script runs application from shared job directory (mapped as drive +z:), process input data (z:data) from job directory and store output +to job directory (z:output). + +### Run jobs + +Run jobs as usual, see [Resource Allocation and Job +Execution](../../resource-allocation-and-job-execution/introduction.html). +Use only full node allocation for virtualization jobs. + +### Running Virtual Machines + +Virtualization is enabled only on compute nodes, virtualization does not +work on login nodes. + +Load QEMU environment module: + + $ module add qemu + +Get help + + $ man qemu + +Run virtual machine (simple) + + $ qemu-system-x86_64 -hda linux.img -enable-kvm -cpu host -smp 16 -m 32768 -vga std -vnc :0 + + $ qemu-system-x86_64 -hda win.img -enable-kvm -cpu host -smp 16 -m 32768 -vga std -localtime -usb -usbdevice tablet -vnc :0 + +You can access virtual machine by VNC viewer (option -vnc) connecting to +IP address of compute node. For VNC you must use [VPN +network](../../accessing-the-cluster/vpn-access.html). + +Install virtual machine from iso file + + $ qemu-system-x86_64 -hda linux.img -enable-kvm -cpu host -smp 16 -m 32768 -vga std -cdrom linux-install.iso -boot d -vnc :0 + + $ qemu-system-x86_64 -hda win.img -enable-kvm -cpu host -smp 16 -m 32768 -vga std -localtime -usb -usbdevice tablet -cdrom win-install.iso -boot d -vnc :0 + +Run virtual machine using optimized devices, user network backend with +sharing and port forwarding, in snapshot mode + + $ qemu-system-x86_64 -drive file=linux.img,media=disk,if=virtio -enable-kvm -cpu host -smp 16 -m 32768 -vga std -device virtio-net-pci,netdev=net0 -netdev user,id=net0,smb=/scratch/$USER/tmp,hostfwd=tcp::2222-:22 -vnc :0 -snapshot + + $ qemu-system-x86_64 -drive file=win.img,media=disk,if=virtio -enable-kvm -cpu host -smp 16 -m 32768 -vga std -localtime -usb -usbdevice tablet -device virtio-net-pci,netdev=net0 -netdev user,id=net0,smb=/scratch/$USER/tmp,hostfwd=tcp::3389-:3389 -vnc :0 -snapshot + +Thanks to port forwarding you can access virtual machine via SSH (Linux) +or RDP (Windows) connecting to IP address of compute node (and port 2222 +for SSH). You must use [VPN +network](../../accessing-the-cluster/vpn-access.html). + +Keep in mind, that if you use virtio devices, you must have virtio +drivers installed on your virtual machine. + +### Networking and data sharing + +For networking virtual machine we suggest to use (default) user network +backend (sometimes called slirp). This network backend NATs virtual +machines and provides useful services for virtual machines as DHCP, DNS, +SMB sharing, port forwarding. + +In default configuration IP network 10.0.2.0/24 is used, host has IP +address 10.0.2.2, DNS server 10.0.2.3, SMB server 10.0.2.4 and virtual +machines obtain address from range 10.0.2.15-10.0.2.31. Virtual machines +have access to Anselm's network via NAT on compute node (host). + +Simple network setup + + $ qemu-system-x86_64 ... -net nic -net user + +(It is default when no -net options are given.) + +Simple network setup with sharing and port forwarding (obsolete but +simpler syntax, lower performance) + + $ qemu-system-x86_64 ... -net nic -net user,smb=/scratch/$USER/tmp,hostfwd=tcp::3389-:3389 + +Optimized network setup with sharing and port forwarding + + $ qemu-system-x86_64 ... -device virtio-net-pci,netdev=net0 -netdev user,id=net0,smb=/scratch/$USER/tmp,hostfwd=tcp::2222-:22 + +### Advanced networking + +Internet access** + +Sometime your virtual machine needs access to internet (install +software, updates, software activation, etc). We suggest solution using +Virtual Distributed Ethernet (VDE) enabled QEMU with SLIRP running on +login node tunnelled to compute node. Be aware, this setup has very low +performance, the worst performance of all described solutions. + +Load VDE enabled QEMU environment module (unload standard QEMU module +first if necessary). + + $ module add qemu/2.1.2-vde2 + +Create virtual network switch. + + $ vde_switch -sock /tmp/sw0 -mgmt /tmp/sw0.mgmt -daemon + +Run SLIRP daemon over SSH tunnel on login node and connect it to virtual +network switch. + + $ dpipe vde_plug /tmp/sw0 = ssh login1 $VDE2_DIR/bin/slirpvde -s - --dhcp & + +Run qemu using vde network backend, connect to created virtual switch. + +Basic setup (obsolete syntax) + + $ qemu-system-x86_64 ... -net nic -net vde,sock=/tmp/sw0 + +Setup using virtio device (obsolete syntax) + + $ qemu-system-x86_64 ... -net nic,model=virtio -net vde,sock=/tmp/sw0 + +Optimized setup + + $ qemu-system-x86_64 ... -device virtio-net-pci,netdev=net0 -netdev vde,id=net0,sock=/tmp/sw0 + +TAP interconnect** + +Both user and vde network backend have low performance. For fast +interconnect (10Gbps and more) of compute node (host) and virtual +machine (guest) we suggest using Linux kernel TAP device. + +Cluster Anselm provides TAP device tap0 for your job. TAP interconnect +does not provide any services (like NAT, DHCP, DNS, SMB, etc.) just raw +networking, so you should provide your services if you need them. + +Run qemu with TAP network backend: + + $ qemu-system-x86_64 ... -device virtio-net-pci,netdev=net1 + -netdev tap,id=net1,ifname=tap0,script=no,downscript=no + +Interface tap0 has IP address 192.168.1.1 and network mask 255.255.255.0 +(/24). In virtual machine use IP address from range +192.168.1.2-192.168.1.254. For your convenience some ports on tap0 +interface are redirected to higher numbered ports, so you as +non-privileged user can provide services on these ports. + +Redirected ports: + +- DNS udp/53->udp/3053, tcp/53->tcp3053 +- DHCP udp/67->udp3067 +- SMB tcp/139->tcp3139, tcp/445->tcp3445). + +You can configure IP address of virtual machine statically or +dynamically. For dynamic addressing provide your DHCP server on port +3067 of tap0 interface, you can also provide your DNS server on port +3053 of tap0 interface for example: + + $ dnsmasq --interface tap0 --bind-interfaces -p 3053 --dhcp-alternate-port=3067,68 --dhcp-range=192.168.1.15,192.168.1.32 --dhcp-leasefile=/tmp/dhcp.leasefile + +You can also provide your SMB services (on ports 3139, 3445) to obtain +high performance data sharing. + +Example smb.conf (not optimized) + + [global] + socket address=192.168.1.1 + smb ports = 3445 3139 + + private dir=/tmp/qemu-smb + pid directory=/tmp/qemu-smb + lock directory=/tmp/qemu-smb + state directory=/tmp/qemu-smb + ncalrpc dir=/tmp/qemu-smb/ncalrpc + log file=/tmp/qemu-smb/log.smbd + smb passwd file=/tmp/qemu-smb/smbpasswd + security = user + map to guest = Bad User + unix extensions = no + load printers = no + printing = bsd + printcap name = /dev/null + disable spoolss = yes + log level = 1 + guest account = USER + [qemu] + path=/scratch/USER/tmp + read only=no + guest ok=yes + writable=yes + follow symlinks=yes + wide links=yes + force user=USER + +(Replace USER with your login name.) + +Run SMB services + + smbd -s /tmp/qemu-smb/smb.conf + + + +Virtual machine can of course have more than one network interface +controller, virtual machine can use more than one network backend. So, +you can combine for example use network backend and TAP interconnect. + +### Snapshot mode + +In snapshot mode image is not written, changes are written to temporary +file (and discarded after virtual machine exits). **It is strongly +recommended mode for running your jobs.** Set TMPDIR environment +variable to local scratch directory for placement temporary files. + + $ export TMPDIR=/lscratch/${PBS_JOBID} + $ qemu-system-x86_64 ... -snapshot + +### Windows guests + +For Windows guests we recommend these options, life will be easier: + + $ qemu-system-x86_64 ... -localtime -usb -usbdevice tablet + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.md new file mode 100644 index 0000000000000000000000000000000000000000..03477ba6e3b3ecd0b61b5086adb72f431dfc91b1 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.md @@ -0,0 +1,242 @@ +Running OpenMPI +=============== + + + +OpenMPI program execution +------------------------- + +The OpenMPI programs may be executed only via the PBS Workload manager, +by entering an appropriate queue. On Anselm, the **bullxmpi-1.2.4.1** +and **OpenMPI 1.6.5** are OpenMPI based MPI implementations. + +### Basic usage + +Use the mpiexec to run the OpenMPI code. + +Example: + + $ qsub -q qexp -l select=4:ncpus=16 -I + qsub: waiting for job 15210.srv11 to start + qsub: job 15210.srv11 ready + + $ pwd + /home/username + + $ module load openmpi + $ mpiexec -pernode ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host cn17 + Hello world! from rank 1 of 4 on host cn108 + Hello world! from rank 2 of 4 on host cn109 + Hello world! from rank 3 of 4 on host cn110 + +Please be aware, that in this example, the directive **-pernode** is +used to run only **one task per node**, which is normally an unwanted +behaviour (unless you want to run hybrid code with just one MPI and 16 +OpenMP tasks per node). In normal MPI programs **omit the -pernode +directive** to run up to 16 MPI tasks per each node. + +In this example, we allocate 4 nodes via the express queue +interactively. We set up the openmpi environment and interactively run +the helloworld_mpi.x program. +Note that the executable +helloworld_mpi.x must be available within the +same path on all nodes. This is automatically fulfilled on the /home and +/scratch filesystem. + +You need to preload the executable, if running on the local scratch +/lscratch filesystem + + $ pwd + /lscratch/15210.srv11 + + $ mpiexec -pernode --preload-binary ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host cn17 + Hello world! from rank 1 of 4 on host cn108 + Hello world! from rank 2 of 4 on host cn109 + Hello world! from rank 3 of 4 on host cn110 + +In this example, we assume the executable +helloworld_mpi.x is present on compute node +cn17 on local scratch. We call the mpiexec whith the +--preload-binary** argument (valid for openmpi). The mpiexec will copy +the executable from cn17 to the +/lscratch/15210.srv11 directory on cn108, cn109 +and cn110 and execute the program. + +MPI process mapping may be controlled by PBS parameters. + +The mpiprocs and ompthreads parameters allow for selection of number of +running MPI processes per node as well as number of OpenMP threads per +MPI process. + +### One MPI process per node + +Follow this example to run one MPI process per node, 16 threads per +process. + + $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=1:ompthreads=16 -I + + $ module load openmpi + + $ mpiexec --bind-to-none ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 1 MPI processes per node and 16 threads per socket, +on 4 nodes. + +### Two MPI processes per node + +Follow this example to run two MPI processes per node, 8 threads per +process. Note the options to mpiexec. + + $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=2:ompthreads=8 -I + + $ module load openmpi + + $ mpiexec -bysocket -bind-to-socket ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 2 MPI processes per node and 8 threads per socket, +each process and its threads bound to a separate processor socket of the +node, on 4 nodes + +### 16 MPI processes per node + +Follow this example to run 16 MPI processes per node, 1 thread per +process. Note the options to mpiexec. + + $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=16:ompthreads=1 -I + + $ module load openmpi + + $ mpiexec -bycore -bind-to-core ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 16 MPI processes per node, single threaded. Each +process is bound to separate processor core, on 4 nodes. + +### OpenMP thread affinity + +Important! Bind every OpenMP thread to a core! + +In the previous two examples with one or two MPI processes per node, the +operating system might still migrate OpenMP threads between cores. You +might want to avoid this by setting these environment variable for GCC +OpenMP: + + $ export GOMP_CPU_AFFINITY="0-15" + +or this one for Intel OpenMP: + + $ export KMP_AFFINITY=granularity=fine,compact,1,0 + +As of OpenMP 4.0 (supported by GCC 4.9 and later and Intel 14.0 and +later) the following variables may be used for Intel or GCC: + + $ export OMP_PROC_BIND=true + $ export OMP_PLACES=cores + +OpenMPI Process Mapping and Binding +------------------------------------------------ + +The mpiexec allows for precise selection of how the MPI processes will +be mapped to the computational nodes and how these processes will bind +to particular processor sockets and cores. + +MPI process mapping may be specified by a hostfile or rankfile input to +the mpiexec program. Altough all implementations of MPI provide means +for process mapping and binding, following examples are valid for the +openmpi only. + +### Hostfile + +Example hostfile + + cn110.bullx + cn109.bullx + cn108.bullx + cn17.bullx + +Use the hostfile to control process placement + + $ mpiexec -hostfile hostfile ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host cn110 + Hello world! from rank 1 of 4 on host cn109 + Hello world! from rank 2 of 4 on host cn108 + Hello world! from rank 3 of 4 on host cn17 + +In this example, we see that ranks have been mapped on nodes according +to the order in which nodes show in the hostfile + +### Rankfile + +Exact control of MPI process placement and resource binding is provided +by specifying a rankfile + +Appropriate binding may boost performance of your application. + +Example rankfile + + rank 0=cn110.bullx slot=1:0,1 + rank 1=cn109.bullx slot=0:* + rank 2=cn108.bullx slot=1:1-2 + rank 3=cn17.bullx slot=0:1,1:0-2 + rank 4=cn109.bullx slot=0:*,1:* + +This rankfile assumes 5 ranks will be running on 4 nodes and provides +exact mapping and binding of the processes to the processor sockets and +cores + +Explanation: +rank 0 will be bounded to cn110, socket1 core0 and core1 +rank 1 will be bounded to cn109, socket0, all cores +rank 2 will be bounded to cn108, socket1, core1 and core2 +rank 3 will be bounded to cn17, socket0 core1, socket1 core0, core1, +core2 +rank 4 will be bounded to cn109, all cores on both sockets + + $ mpiexec -n 5 -rf rankfile --report-bindings ./helloworld_mpi.x + [cn17:11180] MCW rank 3 bound to socket 0[core 1] socket 1[core 0-2]: [. B . . . . . .][B B B . . . . .] (slot list 0:1,1:0-2) + [cn110:09928] MCW rank 0 bound to socket 1[core 0-1]: [. . . . . . . .][B B . . . . . .] (slot list 1:0,1) + [cn109:10395] MCW rank 1 bound to socket 0[core 0-7]: [B B B B B B B B][. . . . . . . .] (slot list 0:*) + [cn108:10406] MCW rank 2 bound to socket 1[core 1-2]: [. . . . . . . .][. B B . . . . .] (slot list 1:1-2) + [cn109:10406] MCW rank 4 bound to socket 0[core 0-7] socket 1[core 0-7]: [B B B B B B B B][B B B B B B B B] (slot list 0:*,1:*) + Hello world! from rank 3 of 5 on host cn17 + Hello world! from rank 1 of 5 on host cn109 + Hello world! from rank 0 of 5 on host cn110 + Hello world! from rank 4 of 5 on host cn109 + Hello world! from rank 2 of 5 on host cn108 + +In this example we run 5 MPI processes (5 ranks) on four nodes. The +rankfile defines how the processes will be mapped on the nodes, sockets +and cores. The **--report-bindings** option was used to print out the +actual process location and bindings. Note that ranks 1 and 4 run on the +same node and their core binding overlaps. + +It is users responsibility to provide correct number of ranks, sockets +and cores. + +### Bindings verification + +In all cases, binding and threading may be verified by executing for +example: + + $ mpiexec -bysocket -bind-to-socket --report-bindings echo + $ mpiexec -bysocket -bind-to-socket numactl --show + $ mpiexec -bysocket -bind-to-socket echo $OMP_NUM_THREADS + +Changes in OpenMPI 1.8 +---------------------- + +Some options have changed in OpenMPI version 1.8. + + |version 1.6.5 |version 1.8.1 | + | --- | --- | + |--bind-to-none |--bind-to none | + |--bind-to-core |--bind-to core | + |--bind-to-socket |--bind-to socket | + |-bysocket |--map-by socket | + |-bycore |--map-by core | + |-pernode |--map-by ppr:1:node\ | + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.md new file mode 100644 index 0000000000000000000000000000000000000000..fb348bb0091106d09774641c992a2f6ecca9cd56 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.md @@ -0,0 +1,171 @@ +MPI +=== + + + +Setting up MPI Environment +-------------------------- + +The Anselm cluster provides several implementations of the MPI library: + + |MPI Library |Thread support | + | --- | --- | + |The highly optimized and stable <strong>bullxmpi 1.2.4.1</strong>\ |<strong></strong>Partial thread support up to MPI_THREAD_SERIALIZED | + |The <strong>Intel MPI 4.1</strong> |Full thread support up to MPI_THREAD_MULTIPLE | + |The <a href="http://www.open-mpi.org/" <strong>OpenMPI 1.6.5</strong></a> |Full thread support up to MPI_THREAD_MULTIPLE, BLCR c/r support | + |The OpenMPI 1.8.1 |Full thread support up to MPI_THREAD_MULTIPLE, MPI-3.0 support | + |The <strong><strong>mpich2 1.9</strong></strong> |Full thread support up to <strong></strong> MPI_THREAD_MULTIPLE, BLCR c/r support | + +MPI libraries are activated via the environment modules. + +Look up section modulefiles/mpi in module avail + + $ module avail + ------------------------- /opt/modules/modulefiles/mpi ------------------------- + bullxmpi/bullxmpi-1.2.4.1 mvapich2/1.9-icc + impi/4.0.3.008 openmpi/1.6.5-gcc(default) + impi/4.1.0.024 openmpi/1.6.5-gcc46 + impi/4.1.0.030 openmpi/1.6.5-icc + impi/4.1.1.036(default) openmpi/1.8.1-gcc + openmpi/1.8.1-gcc46 + mvapich2/1.9-gcc(default) openmpi/1.8.1-gcc49 + mvapich2/1.9-gcc46 openmpi/1.8.1-icc + +There are default compilers associated with any particular MPI +implementation. The defaults may be changed, the MPI libraries may be +used in conjunction with any compiler. +The defaults are selected via the modules in following way + + Module MPI Compiler suite + -------- |---|---|-------- -------------------------------------------------------------------------------- + PrgEnv-gnu bullxmpi-1.2.4.1 bullx GNU 4.4.6 + PrgEnv-intel Intel MPI 4.1.1 Intel 13.1.1 + bullxmpi bullxmpi-1.2.4.1 none, select via module + impi Intel MPI 4.1.1 none, select via module + openmpi OpenMPI 1.6.5 GNU compilers 4.8.1, GNU compilers 4.4.6, Intel Compilers + openmpi OpenMPI 1.8.1 GNU compilers 4.8.1, GNU compilers 4.4.6, GNU compilers 4.9.0, Intel Compilers + mvapich2 MPICH2 1.9 GNU compilers 4.8.1, GNU compilers 4.4.6, Intel Compilers + +Examples: + + $ module load openmpi + +In this example, we activate the latest openmpi with latest GNU +compilers + +To use openmpi with the intel compiler suite, use + + $ module load intel + $ module load openmpi/1.6.5-icc + +In this example, the openmpi 1.6.5 using intel compilers is activated + +Compiling MPI Programs +---------------------- + +After setting up your MPI environment, compile your program using one of +the mpi wrappers + + $ mpicc -v + $ mpif77 -v + $ mpif90 -v + +Example program: + + // helloworld_mpi.c + #include <stdio.h> + + #include<mpi.h> + + int main(int argc, char **argv) { + + int len; + int rank, size; + char node[MPI_MAX_PROCESSOR_NAME]; + + // Initiate MPI + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + MPI_Comm_size(MPI_COMM_WORLD,&size); + + // Get hostame and print + MPI_Get_processor_name(node,&len); + printf("Hello world! from rank %d of %d on host %sn",rank,size,node); + + // Finalize and exit + MPI_Finalize(); + + return 0; + } + +Compile the above example with + + $ mpicc helloworld_mpi.c -o helloworld_mpi.x + +Running MPI Programs +-------------------- + +The MPI program executable must be compatible with the loaded MPI +module. +Always compile and execute using the very same MPI module. + +It is strongly discouraged to mix mpi implementations. Linking an +application with one MPI implementation and running mpirun/mpiexec form +other implementation may result in unexpected errors. + +The MPI program executable must be available within the same path on all +nodes. This is automatically fulfilled on the /home and /scratch +filesystem. You need to preload the executable, if running on the local +scratch /lscratch filesystem. + +### Ways to run MPI programs + +Optimal way to run an MPI program depends on its memory requirements, +memory access pattern and communication pattern. + +Consider these ways to run an MPI program: +1. One MPI process per node, 16 threads per process +2. Two MPI processes per node, 8 threads per process +3. 16 MPI processes per node, 1 thread per process. + +One MPI** process per node, using 16 threads, is most useful for +memory demanding applications, that make good use of processor cache +memory and are not memory bound. This is also a preferred way for +communication intensive applications as one process per node enjoys full +bandwidth access to the network interface. + +Two MPI** processes per node, using 8 threads each, bound to processor +socket is most useful for memory bandwidth bound applications such as +BLAS1 or FFT, with scalable memory demand. However, note that the two +processes will share access to the network interface. The 8 threads and +socket binding should ensure maximum memory access bandwidth and +minimize communication, migration and numa effect overheads. + +Important! Bind every OpenMP thread to a core! + +In the previous two cases with one or two MPI processes per node, the +operating system might still migrate OpenMP threads between cores. You +want to avoid this by setting the KMP_AFFINITY or GOMP_CPU_AFFINITY +environment variables. + +16 MPI** processes per node, using 1 thread each bound to processor +core is most suitable for highly scalable applications with low +communication demand. + +### Running OpenMPI + +The **bullxmpi-1.2.4.1** and [**OpenMPI +1.6.5**](http://www.open-mpi.org/) are both based on +OpenMPI. Read more on [how to run +OpenMPI](Running_OpenMPI.html) based MPI. + +### Running MPICH2 + +The **Intel MPI** and **mpich2 1.9** are MPICH2 based implementations. +Read more on [how to run MPICH2](running-mpich2.html) +based MPI. + +The Intel MPI may run on the Intel Xeon Phi accelerators as well. Read +more on [how to run Intel MPI on +accelerators](../intel-xeon-phi.html). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.md new file mode 100644 index 0000000000000000000000000000000000000000..e79ef4b1f0d649557a23691f3cc9e03070193127 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.md @@ -0,0 +1,105 @@ +MPI4Py (MPI for Python) +======================= + +OpenMPI interface to Python + + + +Introduction +------------ + +MPI for Python provides bindings of the Message Passing Interface (MPI) +standard for the Python programming language, allowing any Python +program to exploit multiple processors. + +This package is constructed on top of the MPI-1/2 specifications and +provides an object oriented interface which closely follows MPI-2 C++ +bindings. It supports point-to-point (sends, receives) and collective +(broadcasts, scatters, gathers) communications of any picklable Python +object, as well as optimized communications of Python object exposing +the single-segment buffer interface (NumPy arrays, builtin +bytes/string/array objects). + +On Anselm MPI4Py is available in standard Python modules. + +Modules +------- + +MPI4Py is build for OpenMPI. Before you start with MPI4Py you need to +load Python and OpenMPI modules. + + $ module load python + $ module load openmpi + +Execution +--------- + +You need to import MPI to your python program. Include the following +line to the python script: + + from mpi4py import MPI + +The MPI4Py enabled python programs [execute as any other +OpenMPI](Running_OpenMPI.html) code.The simpliest way is +to run + + $ mpiexec python <script>.py + +For example + + $ mpiexec python hello_world.py + +Examples +-------- + +### Hello world! + + from mpi4py import MPI + + comm = MPI.COMM_WORLD + + print "Hello! I'm rank %d from %d running in total..." % (comm.rank, comm.size) + + comm.Barrier()  # wait for everybody to synchronize + +###Collective Communication with NumPy arrays + + from mpi4py import MPI + from __future__ import division + import numpy as np + + comm = MPI.COMM_WORLD + + print("-"*78) + print(" Running on %d cores" % comm.size) + print("-"*78) + + comm.Barrier() + + # Prepare a vector of N=5 elements to be broadcasted... + N = 5 + if comm.rank == 0: +   A = np.arange(N, dtype=np.float64)   # rank 0 has proper data + else: +   A = np.empty(N, dtype=np.float64)   # all other just an empty array + + # Broadcast A from rank 0 to everybody + comm.Bcast( [A, MPI.DOUBLE] ) + + # Everybody should now have the same... + print "[%02d] %s" % (comm.rank, A) + +Execute the above code as: + + $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=16:ompthreads=1 -I + + $ module load python openmpi + + $ mpiexec -bycore -bind-to-core python hello_world.py + +In this example, we run MPI4Py enabled code on 4 nodes, 16 cores per +node (total of 64 processes), each python process is bound to a +different core. +More examples and documentation can be found on [MPI for Python +webpage](https://pythonhosted.org/mpi4py/usrman/index.html). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.md new file mode 100644 index 0000000000000000000000000000000000000000..cf4b32cc7d7574df4928fcb0e9c1b64afab0dce2 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.md @@ -0,0 +1,192 @@ +Running MPICH2 +============== + + + +MPICH2 program execution +------------------------ + +The MPICH2 programs use mpd daemon or ssh connection to spawn processes, +no PBS support is needed. However the PBS allocation is required to +access compute nodes. On Anselm, the **Intel MPI** and **mpich2 1.9** +are MPICH2 based MPI implementations. + +### Basic usage + +Use the mpirun to execute the MPICH2 code. + +Example: + + $ qsub -q qexp -l select=4:ncpus=16 -I + qsub: waiting for job 15210.srv11 to start + qsub: job 15210.srv11 ready + + $ module load impi + + $ mpirun -ppn 1 -hostfile $PBS_NODEFILE ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host cn17 + Hello world! from rank 1 of 4 on host cn108 + Hello world! from rank 2 of 4 on host cn109 + Hello world! from rank 3 of 4 on host cn110 + +In this example, we allocate 4 nodes via the express queue +interactively. We set up the intel MPI environment and interactively run +the helloworld_mpi.x program. We request MPI to spawn 1 process per +node. +Note that the executable helloworld_mpi.x must be available within the +same path on all nodes. This is automatically fulfilled on the /home and +/scratch filesystem. + +You need to preload the executable, if running on the local scratch +/lscratch filesystem + + $ pwd + /lscratch/15210.srv11 + $ mpirun -ppn 1 -hostfile $PBS_NODEFILE cp /home/username/helloworld_mpi.x . + $ mpirun -ppn 1 -hostfile $PBS_NODEFILE ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host cn17 + Hello world! from rank 1 of 4 on host cn108 + Hello world! from rank 2 of 4 on host cn109 + Hello world! from rank 3 of 4 on host cn110 + +In this example, we assume the executable helloworld_mpi.x is present +on shared home directory. We run the cp command via mpirun, copying the +executable from shared home to local scratch . Second mpirun will +execute the binary in the /lscratch/15210.srv11 directory on nodes cn17, +cn108, cn109 and cn110, one process per node. + +MPI process mapping may be controlled by PBS parameters. + +The mpiprocs and ompthreads parameters allow for selection of number of +running MPI processes per node as well as number of OpenMP threads per +MPI process. + +### One MPI process per node + +Follow this example to run one MPI process per node, 16 threads per +process. Note that no options to mpirun are needed + + $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=1:ompthreads=16 -I + + $ module load mvapich2 + + $ mpirun ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 1 MPI processes per node and 16 threads per socket, +on 4 nodes. + +### Two MPI processes per node + +Follow this example to run two MPI processes per node, 8 threads per +process. Note the options to mpirun for mvapich2. No options are needed +for impi. + + $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=2:ompthreads=8 -I + + $ module load mvapich2 + + $ mpirun -bind-to numa ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 2 MPI processes per node and 8 threads per socket, +each process and its threads bound to a separate processor socket of the +node, on 4 nodes + +### 16 MPI processes per node + +Follow this example to run 16 MPI processes per node, 1 thread per +process. Note the options to mpirun for mvapich2. No options are needed +for impi. + + $ qsub -q qexp -l select=4:ncpus=16:mpiprocs=16:ompthreads=1 -I + + $ module load mvapich2 + + $ mpirun -bind-to core ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 16 MPI processes per node, single threaded. Each +process is bound to separate processor core, on 4 nodes. + +### OpenMP thread affinity + +Important! Bind every OpenMP thread to a core! + +In the previous two examples with one or two MPI processes per node, the +operating system might still migrate OpenMP threads between cores. You +might want to avoid this by setting these environment variable for GCC +OpenMP: + + $ export GOMP_CPU_AFFINITY="0-15" + +or this one for Intel OpenMP: + + $ export KMP_AFFINITY=granularity=fine,compact,1,0 + +As of OpenMP 4.0 (supported by GCC 4.9 and later and Intel 14.0 and +later) the following variables may be used for Intel or GCC: + + $ export OMP_PROC_BIND=true + $ export OMP_PLACES=cores + + + +MPICH2 Process Mapping and Binding +---------------------------------- + +The mpirun allows for precise selection of how the MPI processes will be +mapped to the computational nodes and how these processes will bind to +particular processor sockets and cores. + +### Machinefile + +Process mapping may be controlled by specifying a machinefile input to +the mpirun program. Altough all implementations of MPI provide means for +process mapping and binding, following examples are valid for the impi +and mvapich2 only. + +Example machinefile + + cn110.bullx + cn109.bullx + cn108.bullx + cn17.bullx + cn108.bullx + +Use the machinefile to control process placement + + $ mpirun -machinefile machinefile helloworld_mpi.x + Hello world! from rank 0 of 5 on host cn110 + Hello world! from rank 1 of 5 on host cn109 + Hello world! from rank 2 of 5 on host cn108 + Hello world! from rank 3 of 5 on host cn17 + Hello world! from rank 4 of 5 on host cn108 + +In this example, we see that ranks have been mapped on nodes according +to the order in which nodes show in the machinefile + +### Process Binding + +The Intel MPI automatically binds each process and its threads to the +corresponding portion of cores on the processor socket of the node, no +options needed. The binding is primarily controlled by environment +variables. Read more about mpi process binding on [Intel +website](https://software.intel.com/sites/products/documentation/hpc/ics/impi/41/lin/Reference_Manual/Environment_Variables_Process_Pinning.htm). +The MPICH2 uses the -bind-to option Use -bind-to numa or -bind-to core +to bind the process on single core or entire socket. + +### Bindings verification + +In all cases, binding and threading may be verified by executing + + $ mpirun -bindto numa numactl --show + $ mpirun -bindto numa echo $OMP_NUM_THREADS + +Intel MPI on Xeon Phi +--------------------- + +The[MPI section of Intel Xeon Phi +chapter](../intel-xeon-phi.html) provides details on how +to run Intel MPI code on Xeon Phi architecture. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/Matlab.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/Matlab.png new file mode 100644 index 0000000000000000000000000000000000000000..71bdf7e101998fbba84d5e58f341f605df930325 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/Matlab.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.md new file mode 100644 index 0000000000000000000000000000000000000000..eec66a690a4eb80bcb53fe398bfe0fe6d514147d --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.md @@ -0,0 +1,346 @@ +Matlab +====== + + + +Introduction +------------ + +Matlab is available in versions R2015a and R2015b. There are always two +variants of the release: + +- Non commercial or so called EDU variant, which can be used for + common research and educational purposes. +- Commercial or so called COM variant, which can used also for + commercial activities. The licenses for commercial variant are much + more expensive, so usually the commercial variant has only subset of + features compared to the EDU available. + + + +To load the latest version of Matlab load the module + + $ module load MATLAB + +By default the EDU variant is marked as default. If you need other +version or variant, load the particular version. To obtain the list of +available versions use + + $ module avail MATLAB + +If you need to use the Matlab GUI to prepare your Matlab programs, you +can use Matlab directly on the login nodes. But for all computations use +Matlab on the compute nodes via PBS Pro scheduler. + +If you require the Matlab GUI, please follow the general informations +about [running graphical +applications](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html). + +Matlab GUI is quite slow using the X forwarding built in the PBS (qsub +-X), so using X11 display redirection either via SSH or directly by +xauth (please see the "GUI Applications on Compute Nodes over VNC" part +[here](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html)) +is recommended. + +To run Matlab with GUI, use + + $ matlab + +To run Matlab in text mode, without the Matlab Desktop GUI environment, +use + + $ matlab -nodesktop -nosplash + +plots, images, etc... will be still available. + +Running parallel Matlab using Distributed Computing Toolbox / Engine +------------------------------------------------------------------------ + +Distributed toolbox is available only for the EDU variant + +The MPIEXEC mode available in previous versions is no longer available +in MATLAB 2015. Also, the programming interface has changed. Refer +to [Release +Notes](http://www.mathworks.com/help/distcomp/release-notes.html#buanp9e-1). + +Delete previously used file mpiLibConf.m, we have observed crashes when +using Intel MPI. + +To use Distributed Computing, you first need to setup a parallel +profile. We have provided the profile for you, you can either import it +in MATLAB command line: + + > parallel.importProfile('/apps/all/MATLAB/2015a-EDU/SalomonPBSPro.settings') + + ans = + + SalomonPBSPro + +Or in the GUI, go to tab HOME -> Parallel -> Manage Cluster +Profiles..., click Import and navigate to : + +/apps/all/MATLAB/2015a-EDU/SalomonPBSPro.settings + +With the new mode, MATLAB itself launches the workers via PBS, so you +can either use interactive mode or a batch mode on one node, but the +actual parallel processing will be done in a separate job started by +MATLAB itself. Alternatively, you can use "local" mode to run parallel +code on just a single node. + +The profile is confusingly named Salomon, but you can use it also on +Anselm. + +### Parallel Matlab interactive session + +Following example shows how to start interactive session with support +for Matlab GUI. For more information about GUI based applications on +Anselm see [this +page](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html). + + $ xhost + + $ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A NONE-0-0 -q qexp -l select=1 -l walltime=00:30:00 + -l feature__matlab__MATLAB=1 + +This qsub command example shows how to run Matlab on a single node. + +The second part of the command shows how to request all necessary +licenses. In this case 1 Matlab-EDU license and 48 Distributed Computing +Engines licenses. + +Once the access to compute nodes is granted by PBS, user can load +following modules and start Matlab: + + r1i0n17$ module load MATLAB/2015b-EDU + r1i0n17$ matlab & + +### Parallel Matlab batch job in Local mode + +To run matlab in batch mode, write an matlab script, then write a bash +jobscript and execute via the qsub command. By default, matlab will +execute one matlab worker instance per allocated core. + + #!/bin/bash + #PBS -A PROJECT ID + #PBS -q qprod + #PBS -l select=1:ncpus=16:mpiprocs=16:ompthreads=1 + + # change to shared scratch directory + SCR=/scratch/work/user/$USER/$PBS_JOBID + mkdir -p $SCR ; cd $SCR || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/matlabcode.m . + + # load modules + module load MATLAB/2015a-EDU + + # execute the calculation + matlab -nodisplay -r matlabcode > output.out + + # copy output file to home + cp output.out $PBS_O_WORKDIR/. + +This script may be submitted directly to the PBS workload manager via +the qsub command. The inputs and matlab script are in matlabcode.m +file, outputs in output.out file. Note the missing .m extension in the +matlab -r matlabcodefile call, **the .m must not be included**. Note +that the **shared /scratch must be used**. Further, it is **important to +include quit** statement at the end of the matlabcode.m script. + +Submit the jobscript using qsub + + $ qsub ./jobscript + +### Parallel Matlab Local mode program example + +The last part of the configuration is done directly in the user Matlab +script before Distributed Computing Toolbox is started. + + cluster = parcluster('local') + +This script creates scheduler object "cluster" of type "local" that +starts workers locally. + +Please note: Every Matlab script that needs to initialize/use matlabpool +has to contain these three lines prior to calling parpool(sched, ...) +function. + +The last step is to start matlabpool with "cluster" object and correct +number of workers. We have 24 cores per node, so we start 24 workers. + + parpool(cluster,16); + + + ... parallel code ... + + + parpool close + +The complete example showing how to use Distributed Computing Toolbox in +local mode is shown here. + + cluster = parcluster('local'); + cluster + + parpool(cluster,24); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') +    +    T = W*x; % Calculation performed on labs, in parallel. +             % T and W are both codistributed arrays here. + end + T; + whos        % T and W are both distributed arrays here. + + parpool close + quit + +You can copy and paste the example in a .m file and execute. Note that +the parpool size should correspond to **total number of cores** +available on allocated nodes. + +### Parallel Matlab Batch job using PBS mode (workers spawned in a separate job) + +This mode uses PBS scheduler to launch the parallel pool. It uses the +SalomonPBSPro profile that needs to be imported to Cluster Manager, as +mentioned before. This methodod uses MATLAB's PBS Scheduler interface - +it spawns the workers in a separate job submitted by MATLAB using qsub. + +This is an example of m-script using PBS mode: + + cluster = parcluster('SalomonPBSPro'); + set(cluster, 'SubmitArguments', '-A OPEN-0-0'); + set(cluster, 'ResourceTemplate', '-q qprod -l select=10:ncpus=16'); + set(cluster, 'NumWorkers', 160); + + pool = parpool(cluster, 160); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') + + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. + end + whos % T and W are both distributed arrays here. + + % shut down parallel pool + delete(pool) + +Note that we first construct a cluster object using the imported +profile, then set some important options, namely : SubmitArguments, +where you need to specify accounting id, and ResourceTemplate, where you +need to specify number of nodes to run the job. + +You can start this script using batch mode the same way as in Local mode +example. + +### Parallel Matlab Batch with direct launch (workers spawned within the existing job) + +This method is a "hack" invented by us to emulate the mpiexec +functionality found in previous MATLAB versions. We leverage the MATLAB +Generic Scheduler interface, but instead of submitting the workers to +PBS, we launch the workers directly within the running job, thus we +avoid the issues with master script and workers running in separate jobs +(issues with license not available, waiting for the worker's job to +spawn etc.) + +Please note that this method is experimental. + +For this method, you need to use SalomonDirect profile, import it +using [the same way as +SalomonPBSPro](copy_of_matlab.html#running-parallel-matlab-using-distributed-computing-toolbox---engine) + +This is an example of m-script using direct mode: + + parallel.importProfile('/apps/all/MATLAB/2015a-EDU/SalomonDirect.settings') + cluster = parcluster('SalomonDirect'); + set(cluster, 'NumWorkers', 48); + + pool = parpool(cluster, 48); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') + + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. + end + whos % T and W are both distributed arrays here. + + % shut down parallel pool + delete(pool) + +### Non-interactive Session and Licenses + +If you want to run batch jobs with Matlab, be sure to request +appropriate license features with the PBS Pro scheduler, at least the " +-l __feature__matlab__MATLAB=1" for EDU variant of Matlab. More +information about how to check the license features states and how to +request them with PBS Pro, please [look +here](../isv_licenses.html). + +In case of non-interactive session please read the [following +information](../isv_licenses.html) on how to modify the +qsub command to test for available licenses prior getting the resource +allocation. + +### Matlab Distributed Computing Engines start up time + +Starting Matlab workers is an expensive process that requires certain +amount of time. For your information please see the following table: + + |compute nodes|number of workers|start-up time[s]| + |---|---|---| + |16|384|831| + |8|192|807| + |4|96|483| + |2|48|16| + +MATLAB on UV2000 +----------------- + +UV2000 machine available in queue "qfat" can be used for MATLAB +computations. This is a SMP NUMA machine with large amount of RAM, which +can be beneficial for certain types of MATLAB jobs. CPU cores are +allocated in chunks of 8 for this machine. + +You can use MATLAB on UV2000 in two parallel modes : + +### Threaded mode + +Since this is a SMP machine, you can completely avoid using Parallel +Toolbox and use only MATLAB's threading. MATLAB will automatically +detect the number of cores you have allocated and will set +maxNumCompThreads accordingly and certain +operations, such as fft, , eig, svd, +etc. will be automatically run in threads. The advantage of this mode is +that you don't need to modify your existing sequential codes. + +### Local cluster mode + +You can also use Parallel Toolbox on UV2000. Use l[ocal cluster +mode](copy_of_matlab.html#parallel-matlab-batch-job-in-local-mode), +"SalomonPBSPro" profile will not work. + + + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..6c425451b294e75ad697aaca3de1dfe83491ab1a --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.md @@ -0,0 +1,48 @@ +Numerical languages +=================== + +Interpreted languages for numerical computations and analysis + + + +Introduction +------------ + +This section contains a collection of high-level interpreted languages, +primarily intended for numerical computations. + +Matlab +------ + +MATLAB®^ is a high-level language and interactive environment for +numerical computation, visualization, and programming. + + $ module load MATLAB/2015b-EDU + $ matlab + +Read more at the [Matlab +page](matlab.html). + +Octave +------ + +GNU Octave is a high-level interpreted language, primarily intended for +numerical computations. The Octave language is quite similar to Matlab +so that most programs are easily portable. + + $ module load Octave + $ octave + +Read more at the [Octave page](octave.html). + +R +- + +The R is an interpreted language and environment for statistical +computing and graphics. + + $ module load R + $ R + +Read more at the [R page](r.html). + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.md new file mode 100644 index 0000000000000000000000000000000000000000..9b6b8a062e6c52bfc5a860b3078ece7ea9e14be2 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.md @@ -0,0 +1,263 @@ +Matlab 2013-2014 +================ + + + +Introduction +------------ + +This document relates to the old versions R2013 and R2014. For MATLAB +2015, please use [this documentation +instead](copy_of_matlab.html). + +Matlab is available in the latest stable version. There are always two +variants of the release: + +- Non commercial or so called EDU variant, which can be used for + common research and educational purposes. +- Commercial or so called COM variant, which can used also for + commercial activities. The licenses for commercial variant are much + more expensive, so usually the commercial variant has only subset of + features compared to the EDU available. + + + +To load the latest version of Matlab load the module + + $ module load matlab + +By default the EDU variant is marked as default. If you need other +version or variant, load the particular version. To obtain the list of +available versions use + + $ module avail matlab + +If you need to use the Matlab GUI to prepare your Matlab programs, you +can use Matlab directly on the login nodes. But for all computations use +Matlab on the compute nodes via PBS Pro scheduler. + +If you require the Matlab GUI, please follow the general informations +about [running graphical +applications](https://docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/resolveuid/11e53ad0d2fd4c5187537f4baeedff33). + +Matlab GUI is quite slow using the X forwarding built in the PBS (qsub +-X), so using X11 display redirection either via SSH or directly by +xauth (please see the "GUI Applications on Compute Nodes over VNC" part +[here](https://docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/resolveuid/11e53ad0d2fd4c5187537f4baeedff33)) +is recommended. + +To run Matlab with GUI, use + + $ matlab + +To run Matlab in text mode, without the Matlab Desktop GUI environment, +use + + $ matlab -nodesktop -nosplash + +plots, images, etc... will be still available. + +Running parallel Matlab using Distributed Computing Toolbox / Engine +-------------------------------------------------------------------- + +Recommended parallel mode for running parallel Matlab on Anselm is +MPIEXEC mode. In this mode user allocates resources through PBS prior to +starting Matlab. Once resources are granted the main Matlab instance is +started on the first compute node assigned to job by PBS and workers are +started on all remaining nodes. User can use both interactive and +non-interactive PBS sessions. This mode guarantees that the data +processing is not performed on login nodes, but all processing is on +compute nodes. + +  + +For the performance reasons Matlab should use system MPI. On Anselm the +supported MPI implementation for Matlab is Intel MPI. To switch to +system MPI user has to override default Matlab setting by creating new +configuration file in its home directory. The path and file name has to +be exactly the same as in the following listing: + + $ vim ~/matlab/mpiLibConf.m + + function [lib, extras] = mpiLibConf + %MATLAB MPI Library overloading for Infiniband Networks + + mpich = '/opt/intel/impi/4.1.1.036/lib64/'; + + disp('Using Intel MPI 4.1.1.036 over Infiniband') + + lib = strcat(mpich, 'libmpich.so'); + mpl = strcat(mpich, 'libmpl.so'); + opa = strcat(mpich, 'libopa.so'); + + extras = {}; + +System MPI library allows Matlab to communicate through 40Gbps +Infiniband QDR interconnect instead of slower 1Gb ethernet network. + +Please note: The path to MPI library in "mpiLibConf.m" has to match with +version of loaded Intel MPI module. In this example the version +4.1.1.036 of Iintel MPI is used by Matlab and therefore module +impi/4.1.1.036 has to be loaded prior to starting Matlab. + +### Parallel Matlab interactive session + +Once this file is in place, user can request resources from PBS. +Following example shows how to start interactive session with support +for Matlab GUI. For more information about GUI based applications on +Anselm see [this +page](https://docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/resolveuid/11e53ad0d2fd4c5187537f4baeedff33). + + $ xhost + + $ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A NONE-0-0 -q qexp -l select=4:ncpus=16:mpiprocs=16 -l walltime=00:30:00 + -l feature__matlab__MATLAB=1 + +This qsub command example shows how to run Matlab with 32 workers in +following configuration: 2 nodes (use all 16 cores per node) and 16 +workers = mpirocs per node (-l select=2:ncpus=16:mpiprocs=16). If user +requires to run smaller number of workers per node then the "mpiprocs" +parameter has to be changed. + +The second part of the command shows how to request all necessary +licenses. In this case 1 Matlab-EDU license and 32 Distributed Computing +Engines licenses. + +Once the access to compute nodes is granted by PBS, user can load +following modules and start Matlab: + + cn79$ module load matlab/R2013a-EDU + cn79$ module load impi/4.1.1.036 + cn79$ matlab & + +### Parallel Matlab batch job + +To run matlab in batch mode, write an matlab script, then write a bash +jobscript and execute via the qsub command. By default, matlab will +execute one matlab worker instance per allocated core. + + #!/bin/bash + #PBS -A PROJECT ID + #PBS -q qprod + #PBS -l select=2:ncpus=16:mpiprocs=16:ompthreads=1 + + # change to shared scratch directory + SCR=/scratch/$USER/$PBS_JOBID + mkdir -p $SCR ; cd $SCR || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/matlabcode.m . + + # load modules + module load matlab/R2013a-EDU + module load impi/4.1.1.036 + + # execute the calculation + matlab -nodisplay -r matlabcode > output.out + + # copy output file to home + cp output.out $PBS_O_WORKDIR/. + +This script may be submitted directly to the PBS workload manager via +the qsub command. The inputs and matlab script are in matlabcode.m +file, outputs in output.out file. Note the missing .m extension in the +matlab -r matlabcodefile call, **the .m must not be included**. Note +that the **shared /scratch must be used**. Further, it is **important to +include quit** statement at the end of the matlabcode.m script. + +Submit the jobscript using qsub + + $ qsub ./jobscript + +### Parallel Matlab program example + +The last part of the configuration is done directly in the user Matlab +script before Distributed Computing Toolbox is started. + + sched = findResource('scheduler', 'type', 'mpiexec'); + set(sched, 'MpiexecFileName', '/apps/intel/impi/4.1.1/bin/mpirun'); + set(sched, 'EnvironmentSetMethod', 'setenv'); + +This script creates scheduler object "sched" of type "mpiexec" that +starts workers using mpirun tool. To use correct version of mpirun, the +second line specifies the path to correct version of system Intel MPI +library. + +Please note: Every Matlab script that needs to initialize/use matlabpool +has to contain these three lines prior to calling matlabpool(sched, ...) +function. + +The last step is to start matlabpool with "sched" object and correct +number of workers. In this case qsub asked for total number of 32 cores, +therefore the number of workers is also set to 32. + + matlabpool(sched,32); + + + ... parallel code ... + + + matlabpool close + +The complete example showing how to use Distributed Computing Toolbox is +show here. + + sched = findResource('scheduler', 'type', 'mpiexec'); + set(sched, 'MpiexecFileName', '/apps/intel/impi/4.1.1/bin/mpirun') + set(sched, 'EnvironmentSetMethod', 'setenv') + set(sched, 'SubmitArguments', '') + sched + + matlabpool(sched,32); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') +    +    T = W*x; % Calculation performed on labs, in parallel. +             % T and W are both codistributed arrays here. + end + T; + whos        % T and W are both distributed arrays here. + + matlabpool close + quit + +You can copy and paste the example in a .m file and execute. Note that +the matlabpool size should correspond to **total number of cores** +available on allocated nodes. + +### Non-interactive Session and Licenses + +If you want to run batch jobs with Matlab, be sure to request +appropriate license features with the PBS Pro scheduler, at least the " +-l __feature__matlab__MATLAB=1" for EDU variant of Matlab. More +information about how to check the license features states and how to +request them with PBS Pro, please [look +here](../isv_licenses.html). + +In case of non-interactive session please read the [following +information](../isv_licenses.html) on how to modify the +qsub command to test for available licenses prior getting the resource +allocation. + +### Matlab Distributed Computing Engines start up time + +Starting Matlab workers is an expensive process that requires certain +amount of time. For your information please see the following table: + + |compute nodes|number of workers|start-up time[s]| + |---|---|---| + 16 256 1008 + 8 128 534 + 4 64 333 + 2 32 210 + + + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.md new file mode 100644 index 0000000000000000000000000000000000000000..6db86f6251cf7ff58eda6530eddab0dae8ab4de9 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.md @@ -0,0 +1,148 @@ +Octave +====== + + + +Introduction +------------ + +GNU Octave is a high-level interpreted language, primarily intended for +numerical computations. It provides capabilities for the numerical +solution of linear and nonlinear problems, and for performing other +numerical experiments. It also provides extensive graphics capabilities +for data visualization and manipulation. Octave is normally used through +its interactive command line interface, but it can also be used to write +non-interactive programs. The Octave language is quite similar to Matlab +so that most programs are easily portable. Read more on +<http://www.gnu.org/software/octave/>*** + +Two versions of octave are available on Anselm, via module + + Version module + ----------------------------------------------------- |---|---|----------------- + Octave 3.8.2, compiled with GCC and Multithreaded MKL Octave/3.8.2-gimkl-2.11.5 + Octave 4.0.1, compiled with GCC and Multithreaded MKL Octave/4.0.1-gimkl-2.11.5 + Octave 4.0.0, compiled with >GCC and OpenBLAS Octave/4.0.0-foss-2015g + + Modules and execution +---------------------- + + $ module load Octave + +The octave on Anselm is linked to highly optimized MKL mathematical +library. This provides threaded parallelization to many octave kernels, +notably the linear algebra subroutines. Octave runs these heavy +calculation kernels without any penalty. By default, octave would +parallelize to 16 threads. You may control the threads by setting the +OMP_NUM_THREADS environment variable. + +To run octave interactively, log in with ssh -X parameter for X11 +forwarding. Run octave: + + $ octave + +To run octave in batch mode, write an octave script, then write a bash +jobscript and execute via the qsub command. By default, octave will use +16 threads when running MKL kernels. + + #!/bin/bash + + # change to local scratch directory + cd /lscratch/$PBS_JOBID || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/octcode.m . + + # load octave module + module load octave + + # execute the calculation + octave -q --eval octcode > output.out + + # copy output file to home + cp output.out $PBS_O_WORKDIR/. + + #exit + exit + +This script may be submitted directly to the PBS workload manager via +the qsub command. The inputs are in octcode.m file, outputs in +output.out file. See the single node jobscript example in the [Job +execution +section](http://support.it4i.cz/docs/anselm-cluster-documentation/resource-allocation-and-job-execution). + +The octave c compiler mkoctfile calls the GNU gcc 4.8.1 for compiling +native c code. This is very useful for running native c subroutines in +octave environment. + + $ mkoctfile -v + +Octave may use MPI for interprocess communication +This functionality is currently not supported on Anselm cluster. In case +you require the octave interface to MPI, please contact [Anselm +support](https://support.it4i.cz/rt/). + +Xeon Phi Support +---------------- + +Octave may take advantage of the Xeon Phi accelerators. This will only +work on the [Intel Xeon Phi](../intel-xeon-phi.html) +[accelerated nodes](../../compute-nodes.html). + +### Automatic offload support + +Octave can accelerate BLAS type operations (in particular the Matrix +Matrix multiplications] on the Xeon Phi accelerator, via [Automatic +Offload using the MKL +library](../intel-xeon-phi.html#section-3) + +Example + + $ export OFFLOAD_REPORT=2 + $ export MKL_MIC_ENABLE=1 + $ module load octave + $ octave -q + octave:1> A=rand(10000); B=rand(10000); + octave:2> tic; C=A*B; toc + [MKL] [MIC --] [AO Function]   DGEMM + [MKL] [MIC --] [AO DGEMM Workdivision]   0.32 0.68 + [MKL] [MIC 00] [AO DGEMM CPU Time]   2.896003 seconds + [MKL] [MIC 00] [AO DGEMM MIC Time]   1.967384 seconds + [MKL] [MIC 00] [AO DGEMM CPU->MIC Data]   1347200000 bytes + [MKL] [MIC 00] [AO DGEMM MIC->CPU Data]   2188800000 bytes + Elapsed time is 2.93701 seconds. + +In this example, the calculation was automatically divided among the CPU +cores and the Xeon Phi MIC accelerator, reducing the total runtime from +6.3 secs down to 2.9 secs. + +### Native support + +A version of [native](../intel-xeon-phi.html#section-4) +Octave is compiled for Xeon Phi accelerators. Some limitations apply for +this version: + +- Only command line support. GUI, graph plotting etc. is + not supported. +- Command history in interactive mode is not supported. + +Octave is linked with parallel Intel MKL, so it best suited for batch +processing of tasks that utilize BLAS, LAPACK and FFT operations. By +default, number of threads is set to 120, you can control this +with > OMP_NUM_THREADS environment +variable. + +Calculations that do not employ parallelism (either by using parallel +MKL eg. via matrix operations, fork() +function, [parallel +package](http://octave.sourceforge.net/parallel/) or +other mechanism) will actually run slower than on host CPU. + +To use Octave on a node with Xeon Phi: + + $ ssh mic0 # login to the MIC card + $ source /apps/tools/octave/3.8.2-mic/bin/octave-env.sh # set up environment variables + $ octave -q /apps/tools/octave/3.8.2-mic/example/test0.m # run an example + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.md new file mode 100644 index 0000000000000000000000000000000000000000..694a9d570eb57a6c8a23934110518d13bca2ae08 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.md @@ -0,0 +1,441 @@ +R += + + + +Introduction +------------ + +The R is a language and environment for statistical computing and +graphics. R provides a wide variety of statistical (linear and +nonlinear modelling, classical statistical tests, time-series analysis, +classification, clustering, ...) and graphical techniques, and is highly +extensible. + +One of R's strengths is the ease with which well-designed +publication-quality plots can be produced, including mathematical +symbols and formulae where needed. Great care has been taken over the +defaults for the minor design choices in graphics, but the user retains +full control. + +Another convenience is the ease with which the C code or third party +libraries may be integrated within R. + +Extensive support for parallel computing is available within R. + +Read more on <http://www.r-project.org/>, +<http://cran.r-project.org/doc/manuals/r-release/R-lang.html> + +Modules +------- + +**The R version 3.0.1 is available on Anselm, along with GUI interface +Rstudio + + |Application|Version|module| + ------- |---|---|---- --------- + **R** R 3.0.1 R + |**Rstudio**|Rstudio 0.97|Rstudio| + + $ module load R + +Execution +--------- + +The R on Anselm is linked to highly optimized MKL mathematical +library. This provides threaded parallelization to many R kernels, +notably the linear algebra subroutines. The R runs these heavy +calculation kernels without any penalty. By default, the R would +parallelize to 16 threads. You may control the threads by setting the +OMP_NUM_THREADS environment variable. + +### Interactive execution + +To run R interactively, using Rstudio GUI, log in with ssh -X parameter +for X11 forwarding. Run rstudio: + + $ module load Rstudio + $ rstudio + +### Batch execution + +To run R in batch mode, write an R script, then write a bash jobscript +and execute via the qsub command. By default, R will use 16 threads when +running MKL kernels. + +Example jobscript: + + #!/bin/bash + + # change to local scratch directory + cd /lscratch/$PBS_JOBID || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/rscript.R . + + # load R module + module load R + + # execute the calculation + R CMD BATCH rscript.R routput.out + + # copy output file to home + cp routput.out $PBS_O_WORKDIR/. + + #exit + exit + +This script may be submitted directly to the PBS workload manager via +the qsub command. The inputs are in rscript.R file, outputs in +routput.out file. See the single node jobscript example in the [Job +execution +section](../../resource-allocation-and-job-execution/job-submission-and-execution.html). + +Parallel R +---------- + +Parallel execution of R may be achieved in many ways. One approach is +the implied parallelization due to linked libraries or specially enabled +functions, as [described +above](r.html#interactive-execution). In the following +sections, we focus on explicit parallelization, where parallel +constructs are directly stated within the R script. + +Package parallel +-------------------- + +The package parallel provides support for parallel computation, +including by forking (taken from package multicore), by sockets (taken +from package snow) and random-number generation. + +The package is activated this way: + + $ R + > library(parallel) + +More information and examples may be obtained directly by reading the +documentation available in R + + > ?parallel + > library(help = "parallel") + > vignette("parallel") + +Download the package +[parallell](package-parallel-vignette) vignette. + +The forking is the most simple to use. Forking family of functions +provide parallelized, drop in replacement for the serial apply() family +of functions. + +Forking via package parallel provides functionality similar to OpenMP +construct +#omp parallel for + +Only cores of single node can be utilized this way! + +Forking example: + + library(parallel) + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #initialize + size <- detectCores() + + while (TRUE) + { + #read number of intervals + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + + if(n<=0) break + + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(1,n); + pi3 <- h*sum(simplify2array(mclapply(i,f,h,mc.cores=size))); + + #print results + cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + +The above example is the classic parallel example for calculating the +number Ď€. Note the **detectCores()** and **mclapply()** functions. +Execute the example as: + + $ R --slave --no-save --no-restore -f pi3p.R + +Every evaluation of the integrad function runs in parallel on different +process. + +Package Rmpi +------------ + +package Rmpi provides an interface (wrapper) to MPI APIs. + +It also provides interactive R slave environment. On Anselm, Rmpi +provides interface to the +[OpenMPI](../mpi-1/Running_OpenMPI.html). + +Read more on Rmpi at <http://cran.r-project.org/web/packages/Rmpi/>, +reference manual is available at +<http://cran.r-project.org/web/packages/Rmpi/Rmpi.pdf> + +When using package Rmpi, both openmpi and R modules must be loaded + + $ module load openmpi + $ module load R + +Rmpi may be used in three basic ways. The static approach is identical +to executing any other MPI programm. In addition, there is Rslaves +dynamic MPI approach and the mpi.apply approach. In the following +section, we will use the number Ď€ integration example, to illustrate all +these concepts. + +### static Rmpi + +Static Rmpi programs are executed via mpiexec, as any other MPI +programs. Number of processes is static - given at the launch time. + +Static Rmpi example: + + library(Rmpi) + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #initialize + invisible(mpi.comm.dup(0,1)) + rank <- mpi.comm.rank() + size <- mpi.comm.size() + n<-0 + + while (TRUE) + { + #read number of intervals + if (rank==0) { + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + } + + #broadcat the intervals + n <- mpi.bcast(as.integer(n),type=1) + + if(n<=0) break + + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(rank+1,n,size); + mypi <- h*sum(sapply(i,f,h)); + + pi3 <- mpi.reduce(mypi) + + #print results + if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + + mpi.quit() + +The above is the static MPI example for calculating the number Ď€. Note +the **library(Rmpi)** and **mpi.comm.dup()** function calls. +Execute the example as: + + $ mpiexec R --slave --no-save --no-restore -f pi3.R + +### dynamic Rmpi + +Dynamic Rmpi programs are executed by calling the R directly. openmpi +module must be still loaded. The R slave processes will be spawned by a +function call within the Rmpi program. + +Dynamic Rmpi example: + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #the worker function + workerpi <- function() + { + #initialize + rank <- mpi.comm.rank() + size <- mpi.comm.size() + n<-0 + + while (TRUE) + { + #read number of intervals + if (rank==0) { + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + } + + #broadcat the intervals + n <- mpi.bcast(as.integer(n),type=1) + + if(n<=0) break + + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(rank+1,n,size); + mypi <- h*sum(sapply(i,f,h)); + + pi3 <- mpi.reduce(mypi) + + #print results + if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + } + + #main + library(Rmpi) + + cat("Enter the number of slaves: ") + fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) + + mpi.spawn.Rslaves(nslaves=ns) + mpi.bcast.Robj2slave(f) + mpi.bcast.Robj2slave(workerpi) + + mpi.bcast.cmd(workerpi()) + workerpi() + + mpi.quit() + +The above example is the dynamic MPI example for calculating the number +Ď€. Both master and slave processes carry out the calculation. Note the +mpi.spawn.Rslaves(), mpi.bcast.Robj2slave()** and the +mpi.bcast.cmd()** function calls. +Execute the example as: + + $ R --slave --no-save --no-restore -f pi3Rslaves.R + +### mpi.apply Rmpi + +mpi.apply is a specific way of executing Dynamic Rmpi programs. + +mpi.apply() family of functions provide MPI parallelized, drop in +replacement for the serial apply() family of functions. + +Execution is identical to other dynamic Rmpi programs. + +mpi.apply Rmpi example: + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #the worker function + workerpi <- function(rank,size,n) + { + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(rank,n,size); + mypi <- h*sum(sapply(i,f,h)); + + return(mypi) + } + + #main + library(Rmpi) + + cat("Enter the number of slaves: ") + fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) + + mpi.spawn.Rslaves(nslaves=ns) + mpi.bcast.Robj2slave(f) + mpi.bcast.Robj2slave(workerpi) + + while (TRUE) + { + #read number of intervals + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + if(n<=0) break + + #run workerpi + i=seq(1,2*ns) + pi3=sum(mpi.parSapply(i,workerpi,2*ns,n)) + + #print results + cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + + mpi.quit() + +The above is the mpi.apply MPI example for calculating the number Ď€. +Only the slave processes carry out the calculation. Note the +mpi.parSapply(), ** function call. The package +parallel +[example](r.html#package-parallel)[above](r.html#package-parallel){.anchor +may be trivially adapted (for much better performance) to this structure +using the mclapply() in place of mpi.parSapply(). + +Execute the example as: + + $ R --slave --no-save --no-restore -f pi3parSapply.R + +Combining parallel and Rmpi +--------------------------- + +Currently, the two packages can not be combined for hybrid calculations. + +Parallel execution +------------------ + +The R parallel jobs are executed via the PBS queue system exactly as any +other parallel jobs. User must create an appropriate jobscript and +submit via the **qsub** + +Example jobscript for [static Rmpi](r.html#static-rmpi) +parallel R execution, running 1 process per core: + + #!/bin/bash + #PBS -q qprod + #PBS -N Rjob + #PBS -l select=100:ncpus=16:mpiprocs=16:ompthreads=1 + + # change to scratch directory + SCRDIR=/scratch/$USER/myjob + cd $SCRDIR || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/rscript.R . + + # load R and openmpi module + module load R + module load openmpi + + # execute the calculation + mpiexec -bycore -bind-to-core R --slave --no-save --no-restore -f rscript.R + + # copy output file to home + cp routput.out $PBS_O_WORKDIR/. + + #exit + exit + +For more information about jobscripts and MPI execution refer to the +[Job +submission](../../resource-allocation-and-job-execution/job-submission-and-execution.html) +and general [MPI](../mpi-1.html) sections. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.md new file mode 100644 index 0000000000000000000000000000000000000000..dc843fe8be9b69939bcbdbb202714a220d4cfdfd --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.md @@ -0,0 +1,91 @@ +FFTW +==== + +The discrete Fourier transform in one or more dimensions, MPI parallel + + + + + +FFTW is a C subroutine library for computing the discrete Fourier +transform in one or more dimensions, of arbitrary input size, and of +both real and complex data (as well as of even/odd data, i.e. the +discrete cosine/sine transforms or DCT/DST). The FFTW library allows for +MPI parallel, in-place discrete Fourier transform, with data distributed +over number of nodes. + +Two versions, **3.3.3** and **2.1.5** of FFTW are available on Anselm, +each compiled for **Intel MPI** and **OpenMPI** using **intel** and +gnu** compilers. These are available via modules: + +<col width="25%" /> +<col width="25%" /> +<col width="25%" /> +<col width="25%" /> + |Version |Parallelization |module |linker options | + | --- | --- | + |FFTW3 gcc3.3.3 |pthread, OpenMP |fftw3/3.3.3-gcc |-lfftw3, -lfftw3_threads-lfftw3_omp | + |FFTW3 icc3.3.3\ |pthread, OpenMP |fftw3 |-lfftw3, -lfftw3_threads-lfftw3_omp | + |FFTW2 gcc2.1.5\ |pthread |fftw2/2.1.5-gcc |-lfftw, -lfftw_threads | + |FFTW2 icc2.1.5 |pthread |fftw2 |-lfftw, -lfftw_threads | + |FFTW3 gcc3.3.3 |OpenMPI |fftw-mpi3/3.3.3-gcc |-lfftw3_mpi | + |FFTW3 icc3.3.3 |Intel MPI |fftw3-mpi |-lfftw3_mpi | + |FFTW2 gcc2.1.5 |OpenMPI |fftw2-mpi/2.1.5-gcc |-lfftw_mpi | + |FFTW2 gcc2.1.5 |IntelMPI |fftw2-mpi/2.1.5-gcc |-lfftw_mpi | + + $ module load fftw3 + +The module sets up environment variables, required for linking and +running fftw enabled applications. Make sure that the choice of fftw +module is consistent with your choice of MPI library. Mixing MPI of +different implementations may have unpredictable results. + +Example +------- + + #include <fftw3-mpi.h> + int main(int argc, char **argv) + { +    const ptrdiff_t N0 = 100, N1 = 1000; +    fftw_plan plan; +    fftw_complex *data; +    ptrdiff_t alloc_local, local_n0, local_0_start, i, j; + +    MPI_Init(&argc, &argv); +    fftw_mpi_init(); + +    /* get local data size and allocate */ +    alloc_local = fftw_mpi_local_size_2d(N0, N1, MPI_COMM_WORLD, +                                         &local_n0, &local_0_start); +    data = fftw_alloc_complex(alloc_local); + +    /* create plan for in-place forward DFT */ +    plan = fftw_mpi_plan_dft_2d(N0, N1, data, data, MPI_COMM_WORLD, +                                FFTW_FORWARD, FFTW_ESTIMATE); + +    /* initialize data */ +    for (i = 0; i < local_n0; ++i) for (j = 0; j < N1; ++j) +    {  data[i*N1 + j][0] = i; +        data[i*N1 + j][1] = j; } + +    /* compute transforms, in-place, as many times as desired */ +    fftw_execute(plan); + +    fftw_destroy_plan(plan); + +    MPI_Finalize(); + } + +Load modules and compile: + + $ module load impi intel + $ module load fftw3-mpi + + $ mpicc testfftw3mpi.c -o testfftw3mpi.x -Wl,-rpath=$LIBRARY_PATH -lfftw3_mpi + + Run the example as [Intel MPI +program](../mpi-1/running-mpich2.html). + +Read more on FFTW usage on the [FFTW +website.](http://www.fftw.org/fftw3_doc/) + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.md new file mode 100644 index 0000000000000000000000000000000000000000..35894dad44cd6d9d3990a456db65894d75c7961e --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.md @@ -0,0 +1,164 @@ +GSL +=== + +The GNU Scientific Library. Provides a wide range of mathematical +routines. + + + +Introduction +------------ + +The GNU Scientific Library (GSL) provides a wide range of mathematical +routines such as random number generators, special functions and +least-squares fitting. There are over 1000 functions in total. The +routines have been written from scratch in C, and present a modern +Applications Programming Interface (API) for C programmers, allowing +wrappers to be written for very high level languages. + +The library covers a wide range of topics in numerical computing. +Routines are available for the following areas: + + ------------------ |---|---|-------------- ------------------------ + Complex Numbers Roots of Polynomials + + Special Functions Vectors and Matrices + + Permutations Combinations + + Sorting BLAS Support + + Linear Algebra CBLAS Library + + Fast Fourier Transforms Eigensystems + + Random Numbers Quadrature + + Random Distributions Quasi-Random Sequences + + Histograms Statistics + + Monte Carlo Integration N-Tuples + + Differential Equations Simulated Annealing + + Numerical Interpolation + Differentiation + + Series Acceleration Chebyshev Approximations + + Root-Finding Discrete Hankel + Transforms + + Least-Squares Fitting Minimization + + IEEE Floating-Point Physical Constants + + Basis Splines Wavelets + ------------------ |---|---|-------------- ------------------------ + +Modules +------- + +The GSL 1.16 is available on Anselm, compiled for GNU and Intel +compiler. These variants are available via modules: + + Module Compiler + ----------------- |---|---|- + gsl/1.16-gcc gcc 4.8.6 + gsl/1.16-icc(default) icc + +  $ module load gsl + +The module sets up environment variables, required for linking and +running GSL enabled applications. This particular command loads the +default module, which is gsl/1.16-icc + +Linking +------- + +Load an appropriate gsl module. Link using **-lgsl** switch to link your +code against GSL. The GSL depends on cblas API to BLAS library, which +must be supplied for linking. The BLAS may be provided, for example from +the MKL library, as well as from the BLAS GSL library (-lgslcblas). +Using the MKL is recommended. + +### Compiling and linking with Intel compilers + + $ module load intel + $ module load gsl + $ icc myprog.c -o myprog.x -Wl,-rpath=$LIBRARY_PATH -mkl -lgsl + +### Compiling and linking with GNU compilers + + $ module load gcc + $ module load mkl + $ module load gsl/1.16-gcc + $ gcc myprog.c -o myprog.x -Wl,-rpath=$LIBRARY_PATH -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lgsl + +Example +------- + +Following is an example of discrete wavelet transform implemented by +GSL: + + #include <stdio.h> + #include <math.h> + #include <gsl/gsl_sort.h> + #include <gsl/gsl_wavelet.h> + + int + main (int argc, char **argv) + { +  int i, n = 256, nc = 20; +  double *data = malloc (n * sizeof (double)); +  double *abscoeff = malloc (n * sizeof (double)); +  size_t *p = malloc (n * sizeof (size_t)); + +  gsl_wavelet *w; +  gsl_wavelet_workspace *work; + +  w = gsl_wavelet_alloc (gsl_wavelet_daubechies, 4); +  work = gsl_wavelet_workspace_alloc (n); + +  for (i=0; i<n; i++) +  data[i] = sin (3.141592654*(double)i/256.0); + +  gsl_wavelet_transform_forward (w, data, 1, n, work); + +  for (i = 0; i < n; i++) +    { +      abscoeff[i] = fabs (data[i]); +    } +  +  gsl_sort_index (p, abscoeff, 1, n); +  +  for (i = 0; (i + nc) < n; i++) +    data[p[i]] = 0; +  +  gsl_wavelet_transform_inverse (w, data, 1, n, work); +  +  for (i = 0; i < n; i++) +    { +      printf ("%gn", data[i]); +    } +  +  gsl_wavelet_free (w); +  gsl_wavelet_workspace_free (work); + +  free (data); +  free (abscoeff); +  free (p); +  return 0; + } + +Load modules and compile: + + $ module load intel gsl + icc dwt.c -o dwt.x -Wl,-rpath=$LIBRARY_PATH -mkl -lgsl + +In this example, we compile the dwt.c code using the Intel compiler and +link it to the MKL and GSL library, note the -mkl and -lgsl options. The +library search path is compiled in, so that no modules are necessary to +run the code. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.md new file mode 100644 index 0000000000000000000000000000000000000000..7c6d9def6a429dcfb1695f4872af6cab1c8bc091 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.md @@ -0,0 +1,120 @@ +HDF5 +==== + +Hierarchical Data Format library. Serial and MPI parallel version. + + + +[HDF5 (Hierarchical Data Format)](http://www.hdfgroup.org/HDF5/) is a +general purpose library and file format for storing scientific data. +HDF5 can store two primary objects: datasets and groups. A dataset is +essentially a multidimensional array of data elements, and a group is a +structure for organizing objects in an HDF5 file. Using these two basic +objects, one can create and store almost any kind of scientific data +structure, such as images, arrays of vectors, and structured and +unstructured grids. You can also mix and match them in HDF5 files +according to your needs. + +Versions **1.8.11** and **1.8.13** of HDF5 library are available on +Anselm, compiled for **Intel MPI** and **OpenMPI** using **intel** and +gnu** compilers. These are available via modules: + + |Version |Parallelization |module |C linker options<th align="left">C++ linker options<th align="left">Fortran linker options | + | --- | --- | + |HDF5 icc serial |pthread |hdf5/1.8.11 |$HDF5_INC $HDF5_SHLIB |$HDF5_INC $HDF5_CPP_LIB |$HDF5_INC $HDF5_F90_LIB | + |HDF5 icc parallel MPI\ |pthread, IntelMPI |hdf5-parallel/1.8.11 |$HDF5_INC $HDF5_SHLIB |Not supported |$HDF5_INC $HDF5_F90_LIB | + |HDF5 icc serial |pthread |hdf5/1.8.13 |$HDF5_INC $HDF5_SHLIB |$HDF5_INC $HDF5_CPP_LIB |$HDF5_INC $HDF5_F90_LIB | + |HDF5 icc parallel MPI\ |pthread, IntelMPI |hdf5-parallel/1.8.13 |$HDF5_INC $HDF5_SHLIB |Not supported |$HDF5_INC $HDF5_F90_LIB | + |HDF5 gcc parallel MPI\ |pthread, OpenMPI 1.6.5, gcc 4.8.1 |hdf5-parallel/1.8.11-gcc |$HDF5_INC $HDF5_SHLIB |Not supported |$HDF5_INC $HDF5_F90_LIB | + |HDF5 gcc parallel MPI\ |pthread, OpenMPI 1.6.5, gcc 4.8.1 |hdf5-parallel/1.8.13-gcc |$HDF5_INC $HDF5_SHLIB |Not supported |$HDF5_INC $HDF5_F90_LIB | + |HDF5 gcc parallel MPI\ |pthread, OpenMPI 1.8.1, gcc 4.9.0 |hdf5-parallel/1.8.13-gcc49 |$HDF5_INC $HDF5_SHLIB |Not supported |$HDF5_INC $HDF5_F90_LIB | + + + + $ module load hdf5-parallel + +The module sets up environment variables, required for linking and +running HDF5 enabled applications. Make sure that the choice of HDF5 +module is consistent with your choice of MPI library. Mixing MPI of +different implementations may have unpredictable results. + +Be aware, that GCC version of **HDF5 1.8.11** has serious performance +issues, since it's compiled with -O0 optimization flag. This version is +provided only for testing of code compiled only by GCC and IS NOT +recommended for production computations. For more informations, please +see: +<http://www.hdfgroup.org/ftp/HDF5/prev-releases/ReleaseFiles/release5-1811> +All GCC versions of **HDF5 1.8.13** are not affected by the bug, are +compiled with -O3 optimizations and are recommended for production +computations. + +Example +------- + + #include "hdf5.h" + #define FILE "dset.h5" + + int main() { + + hid_t file_id, dataset_id, dataspace_id; /* identifiers */ + hsize_t dims[2]; + herr_t status; + int i, j, dset_data[4][6]; + + /* Create a new file using default properties. */ + file_id = H5Fcreate(FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + /* Create the data space for the dataset. */ + dims[0] = 4; + dims[1] = 6; + dataspace_id = H5Screate_simple(2, dims, NULL); + + /* Initialize the dataset. */ + for (i = 0; i < 4; i++) + for (j = 0; j < 6; j++) + dset_data[i][j] = i * 6 + j + 1; + + /* Create the dataset. */ + dataset_id = H5Dcreate2(file_id, "/dset", H5T_STD_I32BE, dataspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + /* Write the dataset. */ + status = H5Dwrite(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + dset_data); + + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + dset_data); + + /* End access to the dataset and release resources used by it. */ + status = H5Dclose(dataset_id); + + /* Terminate access to the data space. */ + status = H5Sclose(dataspace_id); + + /* Close the file. */ + status = H5Fclose(file_id); + } + +Load modules and compile: + + $ module load intel impi + $ module load hdf5-parallel + + $ mpicc hdf5test.c -o hdf5test.x -Wl,-rpath=$LIBRARY_PATH $HDF5_INC $HDF5_SHLIB + + Run the example as [Intel MPI +program](../anselm-cluster-documentation/software/mpi-1/running-mpich2.html). + +For further informations, please see the website: +<http://www.hdfgroup.org/HDF5/> + + + + + + + +class="smarterwiki-popup-bubble-tip"> + +btnI=I'm+Feeling+Lucky&btnI=I'm+Feeling+Lucky&q=HDF5%20icc%20serial%09pthread%09hdf5%2F1.8.13%09%24HDF5_INC%20%24HDF5_SHLIB%09%24HDF5_INC%20%24HDF5_CPP_LIB%09%24HDF5_INC%20%24HDF5_F90_LIB%0A%0AHDF5%20icc%20parallel%20MPI%0A%09pthread%2C%20IntelMPI%09hdf5-parallel%2F1.8.13%09%24HDF5_INC%20%24HDF5_SHLIB%09Not%20supported%09%24HDF5_INC%20%24HDF5_F90_LIB+wikipedia "Search Wikipedia"){.smarterwiki-popup-bubble + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.md new file mode 100644 index 0000000000000000000000000000000000000000..eb98c60fa8913a2ed75576197daf7dfbbe68d988 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.md @@ -0,0 +1,54 @@ +Intel numerical libraries +========================= + +Intel libraries for high performance in numerical computing + + + +Intel Math Kernel Library +------------------------- + +Intel Math Kernel Library (Intel MKL) is a library of math kernel +subroutines, extensively threaded and optimized for maximum performance. +Intel MKL unites and provides these basic components: BLAS, LAPACK, +ScaLapack, PARDISO, FFT, VML, VSL, Data fitting, Feast Eigensolver and +many more. + + $ module load mkl + +Read more at the [Intel +MKL](../intel-suite/intel-mkl.html) page. + +Intel Integrated Performance Primitives +--------------------------------------- + +Intel Integrated Performance Primitives, version 7.1.1, compiled for AVX +is available, via module ipp. The IPP is a library of highly optimized +algorithmic building blocks for media and data applications. This +includes signal, image and frame processing algorithms, such as FFT, +FIR, Convolution, Optical Flow, Hough transform, Sum, MinMax and many +more. + + $ module load ipp + +Read more at the [Intel +IPP](../intel-suite/intel-integrated-performance-primitives.html) +page. + +Intel Threading Building Blocks +------------------------------- + +Intel Threading Building Blocks (Intel TBB) is a library that supports +scalable parallel programming using standard ISO C++ code. It does not +require special languages or compilers. It is designed to promote +scalable data parallel programming. Additionally, it fully supports +nested parallelism, so you can build larger parallel components from +smaller parallel components. To use the library, you specify tasks, not +threads, and let the library map tasks onto threads in an efficient +manner. + + $ module load tbb + +Read more at the [Intel +TBB](../intel-suite/intel-tbb.html) page. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.md new file mode 100644 index 0000000000000000000000000000000000000000..94aae9e9cdec1250d4392676ea5f40b0f6c767bc --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.md @@ -0,0 +1,93 @@ +MAGMA for Intel Xeon Phi +======================== + +Next generation dense algebra library for heterogeneous systems with +accelerators + +### Compiling and linking with MAGMA + +To be able to compile and link code with MAGMA library user has to load +following module: + + $ module load magma/1.3.0-mic + +To make compilation more user friendly module also sets these two +environment variables: + +MAGMA_INC - contains paths to the MAGMA header files (to be used for +compilation step) + +MAGMA_LIBS - contains paths to MAGMA libraries (to be used for linking +step).  + +Compilation example: + + $ icc -mkl -O3 -DHAVE_MIC -DADD_ -Wall $MAGMA_INC -c testing_dgetrf_mic.cpp -o testing_dgetrf_mic.o + + $ icc -mkl -O3 -DHAVE_MIC -DADD_ -Wall -fPIC -Xlinker -zmuldefs -Wall -DNOCHANGE -DHOST testing_dgetrf_mic.o -o testing_dgetrf_mic $MAGMA_LIBS + + + +### Running MAGMA code + +MAGMA implementation for Intel MIC requires a MAGMA server running on +accelerator prior to executing the user application. The server can be +started and stopped using following scripts: + +To start MAGMA server use: +$MAGMAROOT/start_magma_server** + +To stop the server use: +$MAGMAROOT/stop_magma_server** + +For deeper understanding how the MAGMA server is started, see the +following script: +$MAGMAROOT/launch_anselm_from_mic.sh** + +To test if the MAGMA server runs properly we can run one of examples +that are part of the MAGMA installation: + + [user@cn204 ~]$ $MAGMAROOT/testing/testing_dgetrf_mic + + [user@cn204 ~]$ export OMP_NUM_THREADS=16 + + [lriha@cn204 ~]$ $MAGMAROOT/testing/testing_dgetrf_mic + Usage: /apps/libs/magma-mic/magmamic-1.3.0/testing/testing_dgetrf_mic [options] [-h|--help] + +  M    N    CPU GFlop/s (sec)  MAGMA GFlop/s (sec)  ||PA-LU||/(||A||*N) + ========================================================================= +  1088 1088    ---  ( --- )    13.93 (  0.06)    --- +  2112 2112    ---  ( --- )    77.85 (  0.08)    --- +  3136 3136    ---  ( --- )   183.21 (  0.11)    --- +  4160 4160    ---  ( --- )   227.52 (  0.21)    --- +  5184 5184    ---  ( --- )   258.61 (  0.36)    --- +  6208 6208    ---  ( --- )   333.12 (  0.48)    --- +  7232 7232    ---  ( --- )   416.52 (  0.61)    --- +  8256 8256    ---  ( --- )   446.97 (  0.84)    --- +  9280 9280    ---  ( --- )   461.15 (  1.16)    --- + 10304 10304    ---  ( --- )   500.70 (  1.46)    --- + + + +Please note: MAGMA contains several benchmarks and examples that can be +found in: +$MAGMAROOT/testing/** + +MAGMA relies on the performance of all CPU cores as well as on the +performance of the accelerator. Therefore on Anselm number of CPU OpenMP +threads has to be set to 16:  ** +export OMP_NUM_THREADS=16** + + + +See more details at [MAGMA home +page](http://icl.cs.utk.edu/magma/). + +References +---------- + +[1] MAGMA MIC: Linear Algebra Library for Intel Xeon Phi Coprocessors, +Jack Dongarra et. al, +[http://icl.utk.edu/projectsfiles/magma/pubs/24-MAGMA_MIC_03.pdf +](http://icl.utk.edu/projectsfiles/magma/pubs/24-MAGMA_MIC_03.pdf) + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.md new file mode 100644 index 0000000000000000000000000000000000000000..5bf88ae2c57af507465e8de23e1a3b25c1253966 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.md @@ -0,0 +1,106 @@ +PETSc +===== + +PETSc is a suite of building blocks for the scalable solution of +scientific and engineering applications modelled by partial differential +equations. It supports MPI, shared memory, and GPUs through CUDA or +OpenCL, as well as hybrid MPI-shared memory or MPI-GPU parallelism. + + + +Introduction +------------ + +PETSc (Portable, Extensible Toolkit for Scientific Computation) is a +suite of building blocks (data structures and routines) for the scalable +solution of scientific and engineering applications modelled by partial +differential equations. It allows thinking in terms of high-level +objects (matrices) instead of low-level objects (raw arrays). Written in +C language but can also be called from FORTRAN, C++, Python and Java +codes. It supports MPI, shared memory, and GPUs through CUDA or OpenCL, +as well as hybrid MPI-shared memory or MPI-GPU parallelism. + +Resources +--------- + +- [project webpage](http://www.mcs.anl.gov/petsc/) +- [documentation](http://www.mcs.anl.gov/petsc/documentation/) + - [PETSc Users + Manual (PDF)](http://www.mcs.anl.gov/petsc/petsc-current/docs/manual.pdf) + - [index of all manual + pages](http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/singleindex.html) +- PRACE Video Tutorial [part + 1](http://www.youtube.com/watch?v=asVaFg1NDqY), [part + 2](http://www.youtube.com/watch?v=ubp_cSibb9I), [part + 3](http://www.youtube.com/watch?v=vJAAAQv-aaw), [part + 4](http://www.youtube.com/watch?v=BKVlqWNh8jY), [part + 5](http://www.youtube.com/watch?v=iXkbLEBFjlM) + +Modules +------- + +You can start using PETSc on Anselm by loading the PETSc module. Module +names obey this pattern: + + # module load petsc/version-compiler-mpi-blas-variant, e.g. + module load petsc/3.4.4-icc-impi-mkl-opt + +where `variant` is replaced by one of +`{dbg, opt, threads-dbg, threads-opt}`. The `opt` variant is compiled +without debugging information (no `-g` option) and with aggressive +compiler optimizations (`-O3 -xAVX`). This variant is suitable for +performance measurements and production runs. In all other cases use the +debug (`dbg`) variant, because it contains debugging information, +performs validations and self-checks, and provides a clear stack trace +and message in case of an error. The other two variants `threads-dbg` +and `threads-opt` are `dbg` and `opt`, respectively, built with [OpenMP +and pthreads threading +support](http://www.mcs.anl.gov/petsc/features/threads.html). + +External libraries +------------------ + +PETSc needs at least MPI, BLAS and LAPACK. These dependencies are +currently satisfied with Intel MPI and Intel MKL in Anselm `petsc` +modules. + +PETSc can be linked with a plethora of [external numerical +libraries](http://www.mcs.anl.gov/petsc/miscellaneous/external.html), +extending PETSc functionality, e.g. direct linear system solvers, +preconditioners or partitioners. See below a list of libraries currently +included in Anselm `petsc` modules. + +All these libraries can be used also alone, without PETSc. Their static +or shared program libraries are available in +`$PETSC_DIR/$PETSC_ARCH/lib` and header files in +`$PETSC_DIR/$PETSC_ARCH/include`. `PETSC_DIR` and `PETSC_ARCH` are +environment variables pointing to a specific PETSc instance based on the +petsc module loaded. + +### Libraries linked to PETSc on Anselm (as of 11 April 2015) + +- dense linear algebra + - [Elemental](http://libelemental.org/) +- sparse linear system solvers + - [Intel MKL + Pardiso](https://software.intel.com/en-us/node/470282) + - [MUMPS](http://mumps.enseeiht.fr/) + - [PaStiX](http://pastix.gforge.inria.fr/) + - [SuiteSparse](http://faculty.cse.tamu.edu/davis/suitesparse.html) + - [SuperLU](http://crd.lbl.gov/~xiaoye/SuperLU/#superlu) + - [SuperLU_Dist](http://crd.lbl.gov/~xiaoye/SuperLU/#superlu_dist) +- input/output + - [ExodusII](http://sourceforge.net/projects/exodusii/) + - [HDF5](http://www.hdfgroup.org/HDF5/) + - [NetCDF](http://www.unidata.ucar.edu/software/netcdf/) +- partitioning + - [Chaco](http://www.cs.sandia.gov/CRF/chac.html) + - [METIS](http://glaros.dtc.umn.edu/gkhome/metis/metis/overview) + - [ParMETIS](http://glaros.dtc.umn.edu/gkhome/metis/parmetis/overview) + - [PT-Scotch](http://www.labri.fr/perso/pelegrin/scotch/) +- preconditioners & multigrid + - [Hypre](http://acts.nersc.gov/hypre/) + - [Trilinos ML](http://trilinos.sandia.gov/packages/ml/) + - [SPAI - Sparse Approximate + Inverse](https://bitbucket.org/petsc/pkg-spai) + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.md new file mode 100644 index 0000000000000000000000000000000000000000..ddd041eeb6ecad4e54a7d009a55fb64a29d7dc78 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.md @@ -0,0 +1,74 @@ +Trilinos +======== + +Packages for large scale scientific and engineering problems. Provides +MPI and hybrid parallelization. + +### Introduction + +Trilinos is a collection of software packages for the numerical solution +of large scale scientific and engineering problems. It is based on C++ +and feautures modern object-oriented design. Both serial as well as +parallel computations based on MPI and hybrid parallelization are +supported within Trilinos packages. + +### Installed packages + +Current Trilinos installation on ANSELM contains (among others) the +following main packages + +- **Epetra** - core linear algebra package containing classes for + manipulation with serial and distributed vectors, matrices, + and graphs. Dense linear solvers are supported via interface to BLAS + and LAPACK (Intel MKL on ANSELM). Its extension **EpetraExt** + contains e.g. methods for matrix-matrix multiplication. +- **Tpetra** - next-generation linear algebra package. Supports 64bit + indexing and arbitrary data type using C++ templates. +- **Belos** - library of various iterative solvers (CG, block CG, + GMRES, block GMRES etc.). +- **Amesos** - interface to direct sparse solvers. +- **Anasazi** - framework for large-scale eigenvalue algorithms. +- **IFPACK** - distributed algebraic preconditioner (includes e.g. + incomplete LU factorization) +- **Teuchos** - common tools packages. This package contains classes + for memory management, output, performance monitoring, BLAS and + LAPACK wrappers etc. + +For the full list of Trilinos packages, descriptions of their +capabilities, and user manuals see +[http://trilinos.sandia.gov.](http://trilinos.sandia.gov) + +### Installed version + +Currently, Trilinos in version 11.2.3 compiled with Intel Compiler is +installed on ANSELM. + +### Compilling against Trilinos + +First, load the appropriate module: + + $ module load trilinos + +For the compilation of CMake-aware project, Trilinos provides the +FIND_PACKAGE( Trilinos ) capability, which makes it easy to build +against Trilinos, including linking against the correct list of +libraries. For details, see +<http://trilinos.sandia.gov/Finding_Trilinos.txt> + +For compiling using simple makefiles, Trilinos provides Makefile.export +system, which allows users to include important Trilinos variables +directly into their makefiles. This can be done simply by inserting the +following line into the makefile: + + include Makefile.export.Trilinos + +or + + include Makefile.export.<package> + +if you are interested only in a specific Trilinos package. This will +give you access to the variables such as Trilinos_CXX_COMPILER, +Trilinos_INCLUDE_DIRS, Trilinos_LIBRARY_DIRS etc. For the detailed +description and example makefile see +<http://trilinos.sandia.gov/Export_Makefile.txt>. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.md new file mode 100644 index 0000000000000000000000000000000000000000..01168124d53b6b2006bb10669c056b33eb97b2c5 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.md @@ -0,0 +1,312 @@ +nVidia CUDA +=========== + +A guide to nVidia CUDA programming and GPU usage + + + +CUDA Programming on Anselm +-------------------------- + +The default programming model for GPU accelerators on Anselm is Nvidia +CUDA. To set up the environment for CUDA use + + $ module load cuda + +If the user code is hybrid and uses both CUDA and MPI, the MPI +environment has to be set up as well. One way to do this is to use +the PrgEnv-gnu module, which sets up correct combination of GNU compiler +and MPI library. + + $ module load PrgEnv-gnu + +CUDA code can be compiled directly on login1 or login2 nodes. User does +not have to use compute nodes with GPU accelerator for compilation. To +compile a CUDA source code, use nvcc compiler. + + $ nvcc --version + +CUDA Toolkit comes with large number of examples, that can be +helpful to start with. To compile and test these examples user should +copy them to its home directory + + $ cd ~ + $ mkdir cuda-samples + $ cp -R /apps/nvidia/cuda/6.5.14/samples/* ~/cuda-samples/ + +To compile an examples, change directory to the particular example (here +the example used is deviceQuery) and run "make" to start the compilation + + $ cd ~/cuda-samples/1_Utilities/deviceQuery + $ make + +To run the code user can use PBS interactive session to get access to a +node from qnvidia queue (note: use your project name with parameter -A +in the qsub command) and execute the binary file + + $ qsub -I -q qnvidia -A OPEN-0-0 + $ module load cuda + $ ~/cuda-samples/1_Utilities/deviceQuery/deviceQuery + +Expected output of the deviceQuery example executed on a node with Tesla +K20m is + + CUDA Device Query (Runtime API) version (CUDART static linking) + + Detected 1 CUDA Capable device(s) + + Device 0: "Tesla K20m" + CUDA Driver Version / Runtime Version 5.0 / 5.0 + CUDA Capability Major/Minor version number: 3.5 + Total amount of global memory: 4800 MBytes (5032706048 bytes) + (13) Multiprocessors x (192) CUDA Cores/MP: 2496 CUDA Cores + GPU Clock rate: 706 MHz (0.71 GHz) + Memory Clock rate: 2600 Mhz + Memory Bus Width: 320-bit + L2 Cache Size: 1310720 bytes + Max Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536,65536), 3D=(4096,4096,4096) + Max Layered Texture Size (dim) x layers 1D=(16384) x 2048, 2D=(16384,16384) x 2048 + Total amount of constant memory: 65536 bytes + Total amount of shared memory per block: 49152 bytes + Total number of registers available per block: 65536 + Warp size: 32 + Maximum number of threads per multiprocessor: 2048 + Maximum number of threads per block: 1024 + Maximum sizes of each dimension of a block: 1024 x 1024 x 64 + Maximum sizes of each dimension of a grid: 2147483647 x 65535 x 65535 + Maximum memory pitch: 2147483647 bytes + Texture alignment: 512 bytes + Concurrent copy and kernel execution: Yes with 2 copy engine(s) + Run time limit on kernels: No + Integrated GPU sharing Host Memory: No + Support host page-locked memory mapping: Yes + Alignment requirement for Surfaces: Yes + Device has ECC support: Enabled + Device supports Unified Addressing (UVA): Yes + Device PCI Bus ID / PCI location ID: 2 / 0 + Compute Mode: + < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) > + deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 5.0, CUDA Runtime Version = 5.0, NumDevs = 1, Device0 = Tesla K20m + +### Code example + +In this section we provide a basic CUDA based vector addition code +example. You can directly copy and paste the code to test it. + + $ vim test.cu + + #define N (2048*2048) + #define THREADS_PER_BLOCK 512 + + #include <stdio.h> + #include <stdlib.h> + + // GPU kernel function to add two vectors + __global__ void add_gpu( int *a, int *b, int *c, int n){ +  int index = threadIdx.x + blockIdx.x * blockDim.x; +  if (index < n) +  c[index] = a[index] + b[index]; + } + + // CPU function to add two vectors + void add_cpu (int *a, int *b, int *c, int n) { +  for (int i=0; i < n; i++) + c[i] = a[i] + b[i]; + } + + // CPU function to generate a vector of random integers + void random_ints (int *a, int n) { +  for (int i = 0; i < n; i++) +  a[i] = rand() % 10000; // random number between 0 and 9999 + } + + // CPU function to compare two vectors + int compare_ints( int *a, int *b, int n ){ +  int pass = 0; +  for (int i = 0; i < N; i++){ +  if (a[i] != b[i]) { +  printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); +  pass = 1; +  } +  } +  if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); +  return pass; + } + + int main( void ) { +  +  int *a, *b, *c; // host copies of a, b, c +  int *dev_a, *dev_b, *dev_c; // device copies of a, b, c +  int size = N * sizeof( int ); // we need space for N integers + +  // Allocate GPU/device copies of dev_a, dev_b, dev_c +  cudaMalloc( (void**)&dev_a, size ); +  cudaMalloc( (void**)&dev_b, size ); +  cudaMalloc( (void**)&dev_c, size ); + +  // Allocate CPU/host copies of a, b, c +  a = (int*)malloc( size ); +  b = (int*)malloc( size ); +  c = (int*)malloc( size ); +  +  // Fill input vectors with random integer numbers +  random_ints( a, N ); +  random_ints( b, N ); + +  // copy inputs to device +  cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice ); +  cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice ); + +  // launch add_gpu() kernel with blocks and threads +  add_gpu<<< N/THREADS_PER_BLOCK, THREADS_PER_BLOCK >>( dev_a, dev_b, dev_c, N ); + +  // copy device result back to host copy of c +  cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost ); + +  //Check the results with CPU implementation +  int *c_h; c_h = (int*)malloc( size ); +  add_cpu (a, b, c_h, N); +  compare_ints(c, c_h, N); + +  // Clean CPU memory allocations +  free( a ); free( b ); free( c ); free (c_h); + +  // Clean GPU memory allocations +  cudaFree( dev_a ); +  cudaFree( dev_b ); +  cudaFree( dev_c ); + +  return 0; + } + +This code can be compiled using following command + + $ nvcc test.cu -o test_cuda + +To run the code use interactive PBS session to get access to one of the +GPU accelerated nodes + + $ qsub -I -q qnvidia -A OPEN-0-0 + $ module load cuda + $ ./test.cuda + +CUDA Libraries +-------------- + +### CuBLAS + +The NVIDIA CUDA Basic Linear Algebra Subroutines (cuBLAS) library is a +GPU-accelerated version of the complete standard BLAS library with 152 +standard BLAS routines. Basic description of the library together with +basic performance comparison with MKL can be found +[here](https://developer.nvidia.com/cublas "Nvidia cuBLAS"). + +CuBLAS example: SAXPY** + +SAXPY function multiplies the vector x by the scalar alpha and adds it +to the vector y overwriting the latest vector with the result. The +description of the cuBLAS function can be found in [NVIDIA CUDA +documentation](http://docs.nvidia.com/cuda/cublas/index.html#cublas-lt-t-gt-axpy "Nvidia CUDA documentation "). +Code can be pasted in the file and compiled without any modification. + + /* Includes, system */ + #include <stdio.h> + #include <stdlib.h> + + /* Includes, cuda */ + #include <cuda_runtime.h> + #include <cublas_v2.h> + + /* Vector size */ + #define N (32) + + /* Host implementation of a simple version of saxpi */ + void saxpy(int n, float alpha, const float *x, float *y) + { +    for (int i = 0; i < n; ++i) +    y[i] = alpha*x[i] + y[i]; + } + + /* Main */ + int main(int argc, char **argv) + { +    float *h_X, *h_Y, *h_Y_ref; +    float *d_X = 0; +    float *d_Y = 0; + +    const float alpha = 1.0f; +    int i; + +    cublasHandle_t handle; + +    /* Initialize CUBLAS */ +    printf("simpleCUBLAS test running..n"); +    cublasCreate(&handle); + +    /* Allocate host memory for the matrices */ +    h_X = (float *)malloc(N * sizeof(h_X[0])); +    h_Y = (float *)malloc(N * sizeof(h_Y[0])); +    h_Y_ref = (float *)malloc(N * sizeof(h_Y_ref[0])); + +    /* Fill the matrices with test data */ +    for (i = 0; i < N; i++) +    { +        h_X[i] = rand() / (float)RAND_MAX; +        h_Y[i] = rand() / (float)RAND_MAX; +        h_Y_ref[i] = h_Y[i]; +    } + +    /* Allocate device memory for the matrices */ +    cudaMalloc((void **)&d_X, N * sizeof(d_X[0])); +    cudaMalloc((void **)&d_Y, N * sizeof(d_Y[0])); + +    /* Initialize the device matrices with the host matrices */ +    cublasSetVector(N, sizeof(h_X[0]), h_X, 1, d_X, 1); +    cublasSetVector(N, sizeof(h_Y[0]), h_Y, 1, d_Y, 1); + +    /* Performs operation using plain C code */ +    saxpy(N, alpha, h_X, h_Y_ref); + +    /* Performs operation using cublas */ +    cublasSaxpy(handle, N, &alpha, d_X, 1, d_Y, 1); + +    /* Read the result back */ +    cublasGetVector(N, sizeof(h_Y[0]), d_Y, 1, h_Y, 1); + +    /* Check result against reference */ +    for (i = 0; i < N; ++i) +        printf("CPU res = %f t GPU res = %f t diff = %f n", h_Y_ref[i], h_Y[i], h_Y_ref[i] - h_Y[i]); + +    /* Memory clean up */ +    free(h_X); free(h_Y); free(h_Y_ref); +    cudaFree(d_X); cudaFree(d_Y); + +    /* Shutdown */ +    cublasDestroy(handle); + } + + Please note: cuBLAS has its own function for data transfers between CPU +and GPU memory: + - +[cublasSetVector](http://docs.nvidia.com/cuda/cublas/index.html#cublassetvector) +- transfers data from CPU to GPU memory + - +[cublasGetVector](http://docs.nvidia.com/cuda/cublas/index.html#cublasgetvector) +- transfers data from GPU to CPU memory + + To compile the code using NVCC compiler a "-lcublas" compiler flag has +to be specified: + + $ module load cuda + $ nvcc -lcublas test_cublas.cu -o test_cublas_nvcc + +To compile the same code with GCC: + + $ module load cuda + $ gcc -std=c99 test_cublas.c -o test_cublas_icc -lcublas -lcudart + +To compile the same code with Intel compiler: + + $ module load cuda intel + $ icc -std=c99 test_cublas.c -o test_cublas_icc -lcublas -lcudart + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.md new file mode 100644 index 0000000000000000000000000000000000000000..3a6927548f74855361b39ee291a0ccf95aac8046 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.md @@ -0,0 +1,49 @@ +Diagnostic component (TEAM) +=========================== + + + +### Access + +TEAM is available at the following address +: <http://omics.it4i.cz/team/> + +The address is accessible only via +[VPN. ](../../accessing-the-cluster/vpn-access.html) + +### Diagnostic component (TEAM) {#diagnostic-component-team} + +VCF files are scanned by this diagnostic tool for known diagnostic +disease-associated variants. When no diagnostic mutation is found, the +file can be sent to the disease-causing gene discovery tool to see +wheter new disease associated variants can be found. + +TEAM >(27) is an intuitive and easy-to-use web tool that +fills the gap between the predicted mutations and the final diagnostic +in targeted enrichment sequencing analysis. The tool searches for known +diagnostic mutations, corresponding to a disease panel, among the +predicted patient’s variants. Diagnostic variants for the disease are +taken from four databases of disease-related variants (HGMD-public, +HUMSAVAR , ClinVar and COSMIC) If no primary diagnostic variant is +found, then a list of secondary findings that can help to establish a +diagnostic is produced. TEAM also provides with an interface for the +definition of and customization of panels, by means of which, genes and +mutations can be added or discarded to adjust panel definitions. + + + + + +*Figure 5. ***Interface of the application. Panels for defining +targeted regions of interest can be set up by just drag and drop known +disease genes or disease definitions from the lists. Thus, virtual +panels can be interactively improved as the knowledge of the disease +increases.* + +* +* + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig1.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig1.png new file mode 100644 index 0000000000000000000000000000000000000000..0b5670a4e570c385eccc5d83e8cefc8c93e38e03 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig1.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig2.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig2.png new file mode 100644 index 0000000000000000000000000000000000000000..f5bc24d65e435dbd869f873a3c88c6926fe5b466 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig2.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig3.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig3.png new file mode 100644 index 0000000000000000000000000000000000000000..911f443a5a175ca36073dd17944589a37c7dec6a Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig3.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig4.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig4.png new file mode 100644 index 0000000000000000000000000000000000000000..8aa39d6aa924e3a567a135334e7305ccd14ce05d Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig4.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig5.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig5.png new file mode 100644 index 0000000000000000000000000000000000000000..4e87c6f45b1e69d053663a539ab67176d166b094 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig5.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig6.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig6.png new file mode 100644 index 0000000000000000000000000000000000000000..43987a78a007e9489ad5e103db8c80a6749ec259 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig6.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig7.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig7.png new file mode 100644 index 0000000000000000000000000000000000000000..dc4952d10d945e633ffcf2aed1317fba55f55cef Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig7.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig7x.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig7x.png new file mode 100644 index 0000000000000000000000000000000000000000..c02375966b44f3cdfb336e457c9be17dd2b0a3c2 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig7x.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig8.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig8.png new file mode 100644 index 0000000000000000000000000000000000000000..93934042e94fecf27fe2351dfc96da50d64c8921 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig8.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig9.png b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig9.png new file mode 100644 index 0000000000000000000000000000000000000000..406e0dbb156d0f8e75f6230f66dfccc58fed457b Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig9.png differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..0382b6936197e3ccd09774224ddd043c277b8135 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.md @@ -0,0 +1,827 @@ +Overview +======== + +The human NGS data processing solution + + + +Introduction +------------ + +The scope of this OMICS MASTER solution is restricted to human genomics +research (disease causing gene discovery in whole human genome or exome) +or diagnosis (panel sequencing), although it could be extended in the +future to other usages. + +The pipeline inputs the raw data produced by the sequencing machines and +undergoes a processing procedure that consists on a quality control, the +mapping and variant calling steps that result in a file containing the +set of variants in the sample. From this point, the prioritization +component or the diagnostic component can be launched. + + + +*Figure 1.** *OMICS MASTER solution overview. Data is produced in the +external labs and comes to IT4I (represented by the blue dashed line). +The data pre-processor converts raw data into a list of variants and +annotations for each sequenced patient. These lists files together with +primary and secondary (alignment) data files are stored in IT4I sequence +DB and uploaded to the discovery (candidate prioritization) or +diagnostic component where they can be analyzed directly by the user +that produced them, depending of the experimental design carried +out*. style="text-align: left; "> + +Typical genomics pipelines are composed by several components that need +to be launched manually. The advantage of OMICS MASTER pipeline is that +all these components are invoked sequentially in an automated way. + +OMICS MASTER pipeline inputs a FASTQ file and outputs an enriched VCF +file. This pipeline is able to queue all the jobs to PBS by only +launching a process taking all the necessary input files and creates the +intermediate and final folders + +Let’s see each of the OMICS MASTER solution components: + +Components +---------- + +### Processing + +This component is composed by a set of programs that carry out quality +controls, alignment, realignment, variant calling and variant +annotation. It turns raw data from the sequencing machine into files +containing lists of variants (VCF) that once annotated, can be used by +the following components (discovery and diagnosis). + +We distinguish three types of sequencing instruments: bench sequencers +(MySeq, IonTorrent, and Roche Junior, although this last one is about +being discontinued), which produce relatively Genomes in the clinic + +low throughput (tens of million reads), and high end sequencers, which +produce high throughput (hundreds of million reads) among which we have +Illumina HiSeq 2000 (and new models) and SOLiD. All of them but SOLiD +produce data in sequence format. SOLiD produces data in a special format +called colour space that require of specific software for the mapping +process. Once the mapping has been done, the rest of the pipeline is +identical. Anyway, SOLiD is a technology which is also about being +discontinued by the manufacturer so, this type of data will be scarce in +the future. + +#### Quality control, preprocessing and statistics for FASTQ + + FastQC& FastQC. + +These steps are carried out over the original FASTQ file with optimized +scripts and includes the following steps: sequence cleansing, estimation +of base quality scores, elimination of duplicates and statistics. + +Input: FASTQ file. + +Output: FASTQ file plus an HTML file containing statistics on the +data. + +FASTQ format +It represents the nucleotide sequence and its corresponding +quality scores. + + +*Figure 2.**FASTQ file.** + +#### Mapping + +Component:** Hpg-aligner.** + +Sequence reads are mapped over the human reference genome. SOLiD reads +are not covered by this solution; they should be mapped with specific +software (among the few available options, SHRiMP seems to be the best +one). For the rest of NGS machine outputs we use HPG Aligner. +HPG-Aligner is an innovative solution, based on a combination of mapping +with BWT and local alignment with Smith-Waterman (SW), that drastically +increases mapping accuracy (97% versus 62-70% by current mappers, in the +most common scenarios). This proposal provides a simple and fast +solution that maps almost all the reads, even those containing a high +number of mismatches or indels. + +Input: FASTQ file. + +Output:** Aligned file in BAM format.*** + +Sequence Alignment/Map (SAM)** + +It is a human readable tab-delimited format in which each read and +its alignment is represented on a single line. The format can represent +unmapped reads, reads that are mapped to unique locations, and reads +that are mapped to multiple locations. + +The SAM format (1)^> consists of one header +section and one alignment section. The lines in the header section start +with character â€@’, and lines in the alignment section do not. All lines +are TAB delimited. + +In SAM, each alignment line has 11 mandatory fields and a variable +number of optional fields. The mandatory fields are briefly described in +Table 1. They must be present but their value can be a +â€*’> or a zero (depending on the field) if the +corresponding information is unavailable.  + +<col width="33%" /> +<col width="33%" /> +<col width="33%" /> + |<strong>No.</strong>\ |<p><strong>Name</strong>\ |<p><strong>Description</strong></p> | + |1\ |<p>QNAME\ |<p>Query NAME of the read or the read pair</p> | + |2\ |<p>FLAG\ |<p>Bitwise FLAG (pairing,strand,mate strand,etc.)</p> | + |3\ |<p>RNAME \ |<p>Reference sequence NAME</p> | + |4\ |<p>POS \ |<p>1-Based  leftmost POSition of clipped alignment</p> | + |5\ |<p>MAPQ \ |<p>MAPping Quality (Phred-scaled)</p> | + |6\ |<p>CIGAR \ |<p>Extended CIGAR string (operations:MIDNSHP)</p> | + |7\ |<p>MRNM \ |<p>Mate REference NaMe ('=' if same RNAME)</p> | + |8\ |<p>MPOS \ |<p>1-Based leftmost Mate POSition</p> | + |9\ |<p>ISIZE \ |<p>Inferred Insert SIZE </p> | + |10\ |<p>SEQ \ |<p>Query SEQuence on the same strand as the reference</p> | + |11\ |<p>QUAL \ |<p>Query QUALity (ASCII-33=Phred base quality)</p> | + +*Table 1.** *Mandatory fields in the SAM format. + +The standard CIGAR description of pairwise alignment defines three +operations: â€M’ for match/mismatch, â€I’ for insertion compared with the +reference and â€D’ for deletion. The extended CIGAR proposed in SAM added +four more operations: â€N’ for skipped bases on the reference, â€S’ for +soft clipping, â€H’ for hard clipping and â€P’ for padding. These support +splicing, clipping, multi-part and padded alignments. Figure 3 shows +examples of CIGAR strings for different types of alignments. + + +* +Figure 3.** *SAM format file. The â€@SQ’ line in the header section +gives the order of reference sequences. Notably, r001 is the name of a +read pair. According to FLAG 163 (=1+2+32+128), the read mapped to +position 7 is the second read in the pair (128) and regarded as properly +paired (1 + 2); its mate is mapped to 37 on the reverse strand (32). +Read r002 has three soft-clipped (unaligned) bases. The coordinate shown +in SAM is the position of the first aligned base. The CIGAR string for +this alignment contains a P (padding) operation which correctly aligns +the inserted sequences. Padding operations can be absent when an aligner +does not support multiple sequence alignment. The last six bases of read +r003 map to position 9, and the first five to position 29 on the reverse +strand. The hard clipping operation H indicates that the clipped +sequence is not present in the sequence field. The NM tag gives the +number of mismatches. Read r004 is aligned across an intron, indicated +by the N operation.** + +Binary Alignment/Map (BAM)** + +BAM is the binary representation of SAM and keeps exactly the same +information as SAM. BAM uses lossless compression to reduce the size of +the data by about 75% and provides an indexing system that allows reads +that overlap a region of the genome to be retrieved and rapidly +traversed. + +#### Quality control, preprocessing and statistics for BAM + +Component:** Hpg-Fastq & FastQC. Some features: + +- Quality control: % reads with N errors, % reads with multiple + mappings, strand bias, paired-end insert, ... +- Filtering: by number of errors, number of hits, … + - Comparator: stats, intersection, ... + +Input:** BAM** file.** + +Output:** BAM file plus an HTML file containing statistics.** + +#### Variant Calling + +Component:** GATK.** + +Identification of single nucleotide variants and indels on the +alignments is performed using the Genome Analysis Toolkit (GATK). GATK +(2)^ is a software package developed at the Broad Institute to analyze +high-throughput sequencing data. The toolkit offers a wide variety of +tools, with a primary focus on variant discovery and genotyping as well +as strong emphasis on data quality assurance. + +Input:** BAM** + +Output:** VCF** + +**Variant Call Format (VCF)** + +VCF (3)^> is a standardized format for storing the +most prevalent types of sequence variation, including SNPs, indels and +larger structural variants, together with rich annotations. The format +was developed with the primary intention to represent human genetic +variation, but its use is not restricted >to diploid genomes +and can be used in different contexts as well. Its flexibility and user +extensibility allows representation of a wide variety of genomic +variation with respect to a single reference sequence. + +A VCF file consists of a header section and a data section. The +header contains an arbitrary number of metainformation lines, each +starting with characters â€##’, and a TAB delimited field definition +line, starting with a single â€#’ character. The meta-information header +lines provide a standardized description of tags and annotations used in +the data section. The use of meta-information allows the information +stored within a VCF file to be tailored to the dataset in question. It +can be also used to provide information about the means of file +creation, date of creation, version of the reference sequence, software +used and any other information relevant to the history of the file. The +field definition line names eight mandatory columns, corresponding to +data columns representing the chromosome (CHROM), a 1-based position of +the start of the variant (POS), unique identifiers of the variant (ID), +the reference allele (REF), a comma separated list of alternate +non-reference alleles (ALT), a phred-scaled quality score (QUAL), site +filtering information (FILTER) and a semicolon separated list of +additional, user extensible annotation (INFO). In addition, if samples +are present in the file, the mandatory header columns are followed by a +FORMAT column and an arbitrary number of sample IDs that define the +samples included in the VCF file. The FORMAT column is used to define +the information contained within each subsequent genotype column, which +consists of a colon separated list of fields. For example, the FORMAT +field GT:GQ:DP in the fourth data entry of Figure 1a indicates that the +subsequent entries contain information regarding the genotype, genotype +quality and read depth for each sample. All data lines are TAB +delimited and the number of fields in each data line must match the +number of fields in the header line. It is strongly recommended that all +annotation tags used are declared in the VCF header section. + + + +Figure 4.**> (a) Example of valid VCF. The header lines +##fileformat and #CHROM are mandatory, the rest is optional but +strongly recommended. Each line of the body describes variants present +in the sampled population at one genomic position or region. All +alternate alleles are listed in the ALT column and referenced from the +genotype fields as 1-based indexes to this list; the reference haplotype +is designated as 0. For multiploid data, the separator indicates whether +the data are phased (|) or unphased (/). Thus, the two alleles C and G +at the positions 2 and 5 in this figure occur on the same chromosome in +SAMPLE1. The first data line shows an example of a deletion (present in +SAMPLE1) and a replacement of two bases by another base (SAMPLE2); the +second line shows a SNP and an insertion; the third a SNP; the fourth a +large structural variant described by the annotation in the INFO column, +the coordinate is that of the base before the variant. (b–f ) Alignments +and VCF representations of different sequence variants: SNP, insertion, +deletion, replacement, and a large deletion. The REF columns shows the +reference bases replaced by the haplotype in the ALT column. The +coordinate refers to the first reference base. (g) Users are advised to +use simplest representation possible and lowest coordinate in cases +where the position is ambiguous. + +###Annotating + +Component:** HPG-Variant + +The functional consequences of every variant found are then annotated +using the HPG-Variant software, which extracts from CellBase**,** the +Knowledge database, all the information relevant on the predicted +pathologic effect of the variants. + +VARIANT (VARIant Analysis Tool) (4)^ reports information on the +variants found that include consequence type and annotations taken from +different databases and repositories (SNPs and variants from dbSNP and +1000 genomes, and disease-related variants from the Genome-Wide +Association Study (GWAS) catalog, Online Mendelian Inheritance in Man +(OMIM), Catalog of Somatic Mutations in Cancer (COSMIC) mutations, etc. +VARIANT also produces a rich variety of annotations that include +information on the regulatory (transcription factor or miRNAbinding +sites, etc.) or structural roles, or on the selective pressures on the +sites affected by the variation. This information allows extending the +conventional reports beyond the coding regions and expands the knowledge +on the contribution of non-coding or synonymous variants to the +phenotype studied. + +Input:** VCF** + +Output:** The output of this step is the Variant Calling Format (VCF) +file, which contains changes with respect to the reference genome with +the corresponding QC and functional annotations.** + +#### CellBase + +CellBase(5)^ is a relational database integrates biological information +from different sources and includes: + +**Core features:** + +We took genome sequences, genes, transcripts, exons, cytobands or cross +references (xrefs) identifiers (IDs) >from Ensembl +(6)^>. Protein information including sequences, xrefs or +protein features (natural variants, mutagenesis sites, +post-translational modifications, etc.) were imported from UniProt +(7)^>. + +**Regulatory:** + +CellBase imports miRNA from miRBase (8)^; curated and non-curated miRNA +targets from miRecords (9)^, >miRTarBase ^(10)^>, +TargetScan(11)^> and microRNA.org ^(12)^> and +CpG islands and conserved regions from the UCSC database +(13)^>.> + +**Functional annotation** + +OBO Foundry (14)^ develops many biomedical ontologies that are +implemented in OBO format. We designed a SQL schema to store these OBO +ontologies and >30 ontologies were imported. OBO ontology term +annotations were taken from Ensembl (6)^. InterPro ^(15)^ annotations +were also imported. + +**Variation** + +CellBase includes SNPs from dbSNP (16)^; SNP population frequencies +from HapMap (17)^, 1000 genomes project ^(18)^ and Ensembl ^(6)^; +phenotypically annotated SNPs were imported from NHRI GWAS Catalog +(19)^,^ ^>HGMD ^(20)^>, Open Access GWAS Database +(21)^>, UniProt ^(7)^> and OMIM +(22)^>; mutations from COSMIC ^(23)^> and +structural variations from Ensembl +(6)^>.> + +**Systems biology** + +We also import systems biology information like interactome information +from IntAct (24)^. Reactome ^(25)^> stores pathway and interaction +information in BioPAX (26)^> format. BioPAX data exchange +format >enables the integration of diverse pathway +resources. We successfully solved the problem of storing data released +in BioPAX format into a SQL relational schema, which allowed us +importing Reactome in CellBase. + +### [Diagnostic component (TEAM)](diagnostic-component-team.html) + +### [Priorization component (BiERApp)](priorization-component-bierapp.html) + +Usage +----- + +First of all, we should load ngsPipeline +module: + + $ module load ngsPipeline + +This command will load python/2.7.5 +module and all the required modules ( +hpg-aligner, +gatk, etc) + + If we launch ngsPipeline with â€-h’, we will get the usage +help: + + $ ngsPipeline -h + Usage: ngsPipeline.py [-h] -i INPUT -o OUTPUT -p PED --project PROJECT --queue +            QUEUE [--stages-path STAGES_PATH] [--email EMAIL] + [--prefix PREFIX] [-s START] [-e END] --log + + Python pipeline + + optional arguments: +  -h, --help       show this help message and exit +  -i INPUT, --input INPUT +  -o OUTPUT, --output OUTPUT +             Output Data directory +  -p PED, --ped PED   Ped file with all individuals +  --project PROJECT   Project Id +  --queue QUEUE     Queue Id +  --stages-path STAGES_PATH +             Custom Stages path +  --email EMAIL     Email +  --prefix PREFIX    Prefix name for Queue Jobs name +  -s START, --start START +             Initial stage +  -e END, --end END   Final stage +  --log         Log to file + + + +Let us see a brief description of the arguments: + +     *-h --help*. Show the help. + +     *-i, --input.* The input data directory. This directory must to +have a special structure. We have to create one folder per sample (with +the same name). These folders will host the fastq files. These fastq +files must have the following pattern “sampleName” + “_” + “1 or 2” + +“.fq”. 1 for the first pair (in paired-end sequences), and 2 for the +second one. + +     *-o , --output.* The output folder. This folder will contain all +the intermediate and final folders. When the pipeline will be executed +completely, we could remove the intermediate folders and keep only the +final one (with the VCF file containing all the variants) + +     *-p , --ped*. The ped file with the pedigree. This file contains +all the sample names. These names must coincide with the names of the +input folders. If our input folder contains more samples than the .ped +file, the pipeline will use only the samples from the .ped file. + +     *--email.* Email for PBS notifications. + +     *--prefix.* Prefix for PBS Job names. + +    *-s, --start & -e, --end.*  Initial and final stage. If we want to +launch the pipeline in a specific stage we must use -s. If we want to +end the pipeline in a specific stage we must use -e. + +     *--log*. Using log argument NGSpipeline will prompt all the logs +to this file. + +    *--project*>. Project ID of your supercomputer +allocation. + +    *--queue*. +[Queue](../../resource-allocation-and-job-execution/introduction.html) +to run the jobs in. + + >Input, output and ped arguments are mandatory. If the output +folder does not exist, the pipeline will create it. + +Examples +--------------------- + +This is an example usage of NGSpipeline: + +We have a folder with the following structure in > +/apps/bio/omics/1.0/sample_data/ >: + + /apps/bio/omics/1.0/sample_data + └── data + ├── file.ped + ├── sample1 + │  ├── sample1_1.fq + │  └── sample1_2.fq + └── sample2 + ├── sample2_1.fq + └── sample2_2.fq + +The ped file ( file.ped) contains the +following info:> + + #family_ID sample_ID parental_ID maternal_ID sex phenotype + FAM sample_A 0 0 1 1 + FAM sample_B 0 0 2 2 + +Now, lets load the NGSPipeline module and copy the sample data to a +[scratch directory](../../storage.html) : + + $ module load ngsPipeline + $ mkdir -p /scratch/$USER/omics/results + $ cp -r /apps/bio/omics/1.0/sample_data /scratch/$USER/omics/ + +Now, we can launch the pipeline (replace OPEN-0-0 with your Project ID) +: + + $ ngsPipeline -i /scratch/$USER/omics/sample_data/data -o /scratch/$USER/omics/results -p /scratch/$USER/omics/sample_data/data/file.ped --project OPEN-0-0 --queue qprod + +This command submits the processing [jobs to the +queue](../../resource-allocation-and-job-execution/job-submission-and-execution.html). + +If we want to re-launch the pipeline from stage 4 until stage 20 we +should use the next command: + + $ ngsPipeline -i /scratch/$USER/omics/sample_data/data -o /scratch/$USER/omics/results -p /scratch/$USER/omics/sample_data/data/file.ped -s 4 -e 20 --project OPEN-0-0 --queue qprod + +Details on the pipeline +------------------------------------ + +The pipeline calls the following tools: + +- >[fastqc](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/), + a> quality control tool for high throughput + sequence data. +- >[gatk](https://www.broadinstitute.org/gatk/), >The + Genome Analysis Toolkit or GATK is a software package developed at + the Broad Institute to analyze high-throughput sequencing data. The + toolkit offers a wide variety of tools, with a primary focus on + variant discovery and genotyping as well as strong emphasis on data + quality assurance. Its robust architecture, powerful processing + engine and high-performance computing features make it capable of + taking on projects of any size. +- >[hpg-aligner](http://wiki.opencb.org/projects/hpg/doku.php?id=aligner:downloads), >HPG + Aligner has been designed to align short and long reads with high + sensitivity, therefore any number of mismatches or indels + are allowed. HPG Aligner implements and combines two well known + algorithms: *Burrows-Wheeler Transform*> (BWT) to + speed-up mapping high-quality reads, + and *Smith-Waterman*> (SW) to increase sensitivity when + reads cannot be mapped using BWT. +- >[hpg-fastq](http://docs.bioinfo.cipf.es/projects/fastqhpc/wiki), > a + quality control tool for high throughput + sequence data. +- >[hpg-variant](http://wiki.opencb.org/projects/hpg/doku.php?id=variant:downloads), >The + HPG Variant suite is an ambitious project aimed to provide a + complete suite of tools to work with genomic variation data, from + VCF tools to variant profiling or genomic statistics. It is being + implemented using High Performance Computing technologies to provide + the best performance possible. +- >[picard](http://picard.sourceforge.net/), >Picard + comprises Java-based command-line utilities that manipulate SAM + files, and a Java API (HTSJDK) for creating new programs that read + and write SAM files. Both SAM text format and SAM binary (BAM) + format are supported. +- >[samtools](http://samtools.sourceforge.net/samtools-c.shtml), >SAM + Tools provide various utilities for manipulating alignments in the + SAM format, including sorting, merging, indexing and generating + alignments in a + per-position format. +- >>[snpEff](http://snpeff.sourceforge.net/), <span>Genetic + variant annotation and effect + prediction toolbox. + +This listing show which tools are used in each step of the pipeline : + +- >stage-00: fastqc +- >stage-01: hpg_fastq +- >stage-02: fastqc +- >stage-03: hpg_aligner and samtools +- >stage-04: samtools +- >stage-05: samtools +- >stage-06: fastqc +- >stage-07: picard +- >stage-08: fastqc +- >stage-09: picard +- >stage-10: gatk +- >stage-11: gatk +- >stage-12: gatk +- >stage-13: gatk +- >stage-14: gatk +- >stage-15: gatk +- >stage-16: samtools +- >stage-17: samtools +- >stage-18: fastqc +- >stage-19: gatk +- >stage-20: gatk +- >stage-21: gatk +- >stage-22: gatk +- >stage-23: gatk +- >stage-24: hpg-variant +- >stage-25: hpg-variant +- >stage-26: snpEff +- >stage-27: snpEff +- >stage-28: hpg-variant + +Interpretation +--------------------------- + +The output folder contains all the subfolders with the intermediate +data. This folder contains the final VCF with all the variants. This +file can be uploaded into +[TEAM](diagnostic-component-team.html) by using the VCF +file button. It is important to note here that the entire management of +the VCF file is local: no patient’s sequence data is sent over the +Internet thus avoiding any problem of data privacy or confidentiality. + + + +*Figure 7**. *TEAM upload panel.* *Once the file has been uploaded, a +panel must be chosen from the Panel *** list. Then, pressing the Run +button the diagnostic process starts.* + +Once the file has been uploaded, a panel must be chosen from the Panel +list. Then, pressing the Run button the diagnostic process starts. TEAM +searches first for known diagnostic mutation(s) taken from four +databases: HGMD-public (20)^, +[HUMSAVAR](http://www.uniprot.org/docs/humsavar), +ClinVar (29)^ and COSMIC ^(23)^. + + + +*Figure 7.** *The panel manager. The elements used to define a panel +are (**A**) disease terms, (**B**) diagnostic mutations and (**C**) +genes. Arrows represent actions that can be taken in the panel manager. +Panels can be defined by using the known mutations and genes of a +particular disease. This can be done by dragging them to the **Primary +Diagnostic** box (action **D**). This action, in addition to defining +the diseases in the **Primary Diagnostic** box, automatically adds the +corresponding genes to the **Genes** box. The panels can be customized +by adding new genes (action **F**) or removing undesired genes (action +G**). New disease mutations can be added independently or associated +to an already existing disease term (action **E**). Disease terms can be +removed by simply dragging them back (action **H**).* + +For variant discovering/filtering we should upload the VCF file into +BierApp by using the following form: + +** + +**Figure 8.** *BierApp VCF upload panel. It is recommended to choose +a name for the job as well as a description.** + +Each prioritization (â€job’) has three associated screens that facilitate +the filtering steps. The first one, the â€Summary’ tab, displays a +statistic of the data set analyzed, containing the samples analyzed, the +number and types of variants found and its distribution according to +consequence types. The second screen, in the â€Variants and effect’ tab, +is the actual filtering tool, and the third one, the â€Genome view’ tab, +offers a representation of the selected variants within the genomic +context provided by an embedded version of >the Genome Maps Tool +(30)^>. + + + +**Figure 9.*** *This picture shows all the information associated to +the variants. If a variant has an associated phenotype we could see it +in the last column. In this case, the variant 7:132481242 C>T is +associated to the phenotype: large intestine tumor.** + +* +* + +References +----------------------- + +1. Heng Li, Bob Handsaker, Alec Wysoker, Tim + Fennell, Jue Ruan, Nils Homer, Gabor Marth5, Goncalo Abecasis6, + Richard Durbin and 1000 Genome Project Data Processing Subgroup: The + Sequence Alignment/Map format and SAMtools. Bioinformatics 2009, + 25: 2078-2079. +2. >McKenna A, Hanna M, Banks E, Sivachenko + A, Cibulskis K, Kernytsky A, Garimella K, Altshuler D, Gabriel S, + Daly M, DePristo MA: The Genome Analysis Toolkit: a MapReduce + framework for analyzing next-generation DNA sequencing data. + *Genome Res* >2010, 20:1297-1303. +3. Petr Danecek, Adam Auton, Goncalo Abecasis, + Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert E. + Handsaker, Gerton Lunter, Gabor T. Marth, Stephen T. Sherry, Gilean + McVean, Richard Durbin, and 1000 Genomes Project Analysis Group. The + variant call format and VCFtools. Bioinformatics 2011, + 27: 2156-2158. +4. Medina I, De Maria A, Bleda M, Salavert F, + Alonso R, Gonzalez CY, Dopazo J: VARIANT: Command Line, Web service + and Web interface for fast and accurate functional characterization + of variants found by Next-Generation Sequencing. Nucleic Acids Res + 2012, 40:W54-58. +5. Bleda M, Tarraga J, de Maria A, Salavert F, + Garcia-Alonso L, Celma M, Martin A, Dopazo J, Medina I: CellBase, a + comprehensive collection of RESTful web services for retrieving + relevant biological information from heterogeneous sources. Nucleic + Acids Res 2012, 40:W609-614. +6. Flicek,P., Amode,M.R., Barrell,D., Beal,K., + Brent,S., Carvalho-Silva,D., Clapham,P., Coates,G., + Fairley,S., Fitzgerald,S. et al. (2012) Ensembl 2012. Nucleic Acids + Res., 40, D84–D90. +7. UniProt Consortium. (2012) Reorganizing the + protein space at the Universal Protein Resource (UniProt). Nucleic + Acids Res., 40, D71–D75. +8. Kozomara,A. and Griffiths-Jones,S. (2011) + miRBase: integrating microRNA annotation and deep-sequencing data. + Nucleic Acids Res., 39, D152–D157. +9. Xiao,F., Zuo,Z., Cai,G., Kang,S., Gao,X. + and Li,T. (2009) miRecords: an integrated resource for + microRNA-target interactions. Nucleic Acids Res., + 37, D105–D110. +10. Hsu,S.D., Lin,F.M., Wu,W.Y., Liang,C., + Huang,W.C., Chan,W.L., Tsai,W.T., Chen,G.Z., Lee,C.J., Chiu,C.M. + et al. (2011) miRTarBase: a database curates experimentally + validated microRNA-target interactions. Nucleic Acids Res., + 39, D163–D169. +11. Friedman,R.C., Farh,K.K., Burge,C.B. + and Bartel,D.P. (2009) Most mammalian mRNAs are conserved targets + of microRNAs. Genome Res., 19, 92–105. +12. Betel,D., Wilson,M., Gabow,A., Marks,D.S. + and Sander,C. (2008) The microRNA.org resource: targets + and expression. Nucleic Acids Res., 36, D149–D153. +13. Dreszer,T.R., Karolchik,D., Zweig,A.S., + Hinrichs,A.S., Raney,B.J., Kuhn,R.M., Meyer,L.R., Wong,M., + Sloan,C.A., Rosenbloom,K.R. et al. (2012) The UCSC genome browser + database: extensions and updates 2011. Nucleic Acids Res., + 40, D918–D923. +14. Smith,B., Ashburner,M., Rosse,C., Bard,J., + Bug,W., Ceusters,W., Goldberg,L.J., Eilbeck,K., + Ireland,A., Mungall,C.J. et al. (2007) The OBO Foundry: coordinated + evolution of ontologies to support biomedical data integration. Nat. + Biotechnol., 25, 1251–1255. +15. Hunter,S., Jones,P., Mitchell,A., + Apweiler,R., Attwood,T.K.,Bateman,A., Bernard,T., Binns,D., + Bork,P., Burge,S. et al. (2012) InterPro in 2011: new developments + in the family and domain prediction database. Nucleic Acids Res., + 40, D306–D312. +16. Sherry,S.T., Ward,M.H., Kholodov,M., + Baker,J., Phan,L., Smigielski,E.M. and Sirotkin,K. (2001) dbSNP: the + NCBI database of genetic variation. Nucleic Acids Res., + 29, 308–311. +17. Altshuler,D.M., Gibbs,R.A., Peltonen,L., + Dermitzakis,E., Schaffner,S.F., Yu,F., Bonnen,P.E., de Bakker,P.I., + Deloukas,P., Gabriel,S.B. et al. (2010) Integrating common and rare + genetic variation in diverse human populations. Nature, + 467, 52–58. +18. 1000 Genomes Project Consortium. (2010) A map + of human genome variation from population-scale sequencing. Nature, + 467, 1061–1073. +19. Hindorff,L.A., Sethupathy,P., Junkins,H.A., + Ramos,E.M., Mehta,J.P., Collins,F.S. and Manolio,T.A. (2009) + Potential etiologic and functional implications of genome-wide + association loci for human diseases and traits. Proc. Natl Acad. + Sci. USA, 106, 9362–9367. +20. Stenson,P.D., Ball,E.V., Mort,M., + Phillips,A.D., Shiel,J.A., Thomas,N.S., Abeysinghe,S., Krawczak,M. + and Cooper,D.N. (2003) Human gene mutation database (HGMD): + 2003 update. Hum. Mutat., 21, 577–581. +21. Johnson,A.D. and O’Donnell,C.J. (2009) An + open access database of genome-wide association results. BMC Med. + Genet, 10, 6. +22. McKusick,V. (1998) A Catalog of Human Genes + and Genetic Disorders, 12th edn. John Hopkins University + Press,Baltimore, MD. +23. Forbes,S.A., Bindal,N., Bamford,S., Cole,C., + Kok,C.Y., Beare,D., Jia,M., Shepherd,R., Leung,K., Menzies,A. et al. + (2011) COSMIC: mining complete cancer genomes in the catalogue of + somatic mutations in cancer. Nucleic Acids Res., + 39, D945–D950. +24. Kerrien,S., Aranda,B., Breuza,L., Bridge,A., + Broackes-Carter,F., Chen,C., Duesbury,M., Dumousseau,M., + Feuermann,M., Hinz,U. et al. (2012) The Intact molecular interaction + database in 2012. Nucleic Acids Res., 40, D841–D846. +25. Croft,D., O’Kelly,G., Wu,G., Haw,R., + Gillespie,M., Matthews,L., Caudy,M., Garapati,P., + Gopinath,G., Jassal,B. et al. (2011) Reactome: a database of + reactions, pathways and biological processes. Nucleic Acids Res., + 39, D691–D697. +26. Demir,E., Cary,M.P., Paley,S., Fukuda,K., + Lemer,C., Vastrik,I.,Wu,G., D’Eustachio,P., Schaefer,C., Luciano,J. + et al. (2010) The BioPAX community standard for pathway + data sharing. Nature Biotechnol., 28, 935–942. +27. Alemán Z, GarcĂa-GarcĂa F, Medina I, Dopazo J + (2014): A web tool for the design and management of panels of genes + for targeted enrichment and massive sequencing for + clinical applications. Nucleic Acids Res 42: W83-7. +28. [Alemán + A](http://www.ncbi.nlm.nih.gov/pubmed?term=Alem%C3%A1n%20A%5BAuthor%5D&cauthor=true&cauthor_uid=24803668)>, [Garcia-Garcia + F](http://www.ncbi.nlm.nih.gov/pubmed?term=Garcia-Garcia%20F%5BAuthor%5D&cauthor=true&cauthor_uid=24803668)>, [Salavert + F](http://www.ncbi.nlm.nih.gov/pubmed?term=Salavert%20F%5BAuthor%5D&cauthor=true&cauthor_uid=24803668)>, [Medina + I](http://www.ncbi.nlm.nih.gov/pubmed?term=Medina%20I%5BAuthor%5D&cauthor=true&cauthor_uid=24803668)>, [Dopazo + J](http://www.ncbi.nlm.nih.gov/pubmed?term=Dopazo%20J%5BAuthor%5D&cauthor=true&cauthor_uid=24803668)> (2014). + A web-based interactive framework to assist in the prioritization of + disease candidate genes in whole-exome sequencing studies. + [Nucleic + Acids Res.](http://www.ncbi.nlm.nih.gov/pubmed/?term=BiERapp "Nucleic acids research.")>42 :W88-93. +29. Landrum,M.J., Lee,J.M., Riley,G.R., Jang,W., + Rubinstein,W.S., Church,D.M. and Maglott,D.R. (2014) ClinVar: public + archive of relationships among sequence variation and + human phenotype. Nucleic Acids Res., 42, D980–D985. +30. Medina I, Salavert F, Sanchez R, de Maria A, + Alonso R, Escobar P, Bleda M, Dopazo J: Genome Maps, a new + generation genome browser. Nucleic Acids Res 2013, 41:W41-46. + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.md new file mode 100644 index 0000000000000000000000000000000000000000..a6cd22b5866bbbb95035359d3f1ffddf1c4772cf --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.md @@ -0,0 +1,42 @@ +Priorization component (BiERApp) +================================ + +### Access + +BiERApp is available at the following address +: <http://omics.it4i.cz/bierapp/> + +The address is accessible only +via [VPN. ](../../accessing-the-cluster/vpn-access.html) + +###BiERApp + +###This tool is aimed to discover new disease genes or variants by studying affected families or cases and controls. It carries out a filtering process to sequentially remove: (i) variants which are not no compatible with the disease because are not expected to have impact on the protein function; (ii) variants that exist at frequencies incompatible with the disease; (iii) variants that do not segregate with the disease. The result is a reduced set of disease gene candidates that should be further validated experimentally. + +BiERapp >(28) efficiently helps in the identification of +causative variants in family and sporadic genetic diseases. The program +reads lists of predicted variants (nucleotide substitutions and indels) +in affected individuals or tumor samples and controls. In family +studies, different modes of inheritance can easily be defined to filter +out variants that do not segregate with the disease along the family. +Moreover, BiERapp integrates additional information such as allelic +frequencies in the general population and the most popular damaging +scores to further narrow down the number of putative variants in +successive filtering steps. BiERapp provides an interactive and +user-friendly interface that implements the filtering strategy used in +the context of a large-scale genomic project carried out by the Spanish +Network for Research, in Rare Diseases (CIBERER) and the Medical Genome +Project. in which more than 800 exomes have been analyzed. + + + +*Figure 6**. *Web interface to the prioritization tool.* *This +figure* *shows the interface of the web tool for candidate gene +prioritization with the filters available. The tool includes a genomic +viewer (Genome Maps >30) that enables the representation of +the variants in the corresponding genomic coordinates.* + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.md new file mode 100644 index 0000000000000000000000000000000000000000..f5579ca1e525599353caf270ff971a04d4c23d58 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.md @@ -0,0 +1,261 @@ +OpenFOAM +======== + +A free, open source CFD software package + + + +Introduction** +---------------- + +OpenFOAM is a free, open source CFD software package developed +by [**OpenCFD Ltd**](http://www.openfoam.com/about) at [**ESI +Group**](http://www.esi-group.com/) and distributed by the [**OpenFOAM +Foundation **](http://www.openfoam.org/). It has a large user base +across most areas of engineering and science, from both commercial and +academic organisations. + +Homepage: <http://www.openfoam.com/> + +###Installed version** + +Currently, several version compiled by GCC/ICC compilers in +single/double precision with several version of openmpi are available on +Anselm. + +For example syntax of available OpenFOAM module is: + +< openfoam/2.2.1-icc-openmpi1.6.5-DP > + +this means openfoam version >2.2.1 compiled by +ICC compiler with >openmpi1.6.5 in> double +precision. + +Naming convection of the installed versions is following: + +  +openfoam/<>VERSION>>-<>COMPILER<span>>-<</span><span>openmpiVERSION</span><span>>-<</span><span>PRECISION</span><span>></span> + +- ><>VERSION>> - version of + openfoam +- ><>COMPILER> - version of used + compiler +- ><>openmpiVERSION> - version of used + openmpi/impi +- ><>PRECISION> - DP/>SP – + double/single precision + +###Available OpenFOAM modules** + +To check available modules use + + $ module avail + +In /opt/modules/modulefiles/engineering you can see installed +engineering softwares: + + ------------------------------------ /opt/modules/modulefiles/engineering ------------------------------------------------------------- + ansys/14.5.x              matlab/R2013a-COM                               openfoam/2.2.1-icc-impi4.1.1.036-DP + comsol/43b-COM            matlab/R2013a-EDU                               openfoam/2.2.1-icc-openmpi1.6.5-DP + comsol/43b-EDU            openfoam/2.2.1-gcc481-openmpi1.6.5-DP           paraview/4.0.1-gcc481-bullxmpi1.2.4.1-osmesa10.0 + lsdyna/7.x.x              openfoam/2.2.1-gcc481-openmpi1.6.5-SP + +For information how to use modules please [look +here](../environment-and-modules.html "Environment and Modules "). + +Getting Started** +------------------- + +To create OpenFOAM environment on ANSELM give the commands: + + $ module load openfoam/2.2.1-icc-openmpi1.6.5-DP + + $ source $FOAM_BASHRC + +Pleas load correct module with your requirements “compiler - GCC/ICC, +precision - DP/SP”. + +Create a project directory within the $HOME/OpenFOAM directory +named ><USER>-<OFversion> and create a directory +named run within it, e.g. by typing: + + $ mkdir -p $FOAM_RUN + +Project directory is now available by typing: + + $ cd /home/<USER>/OpenFOAM/<USER>-<OFversion>/run + +<OFversion> - for example <2.2.1> + +or + + $ cd $FOAM_RUN + +Copy the tutorial examples directory in the OpenFOAM distribution to +the run directory: + + $ cp -r $FOAM_TUTORIALS $FOAM_RUN + +Now you can run the first case for example incompressible laminar flow +in a cavity. + +Running Serial Applications** +------------------------------- + +Create a Bash script >test.sh + + + #!/bin/bash + module load openfoam/2.2.1-icc-openmpi1.6.5-DP + source $FOAM_BASHRC + + # source to run functions + . $WM_PROJECT_DIR/bin/tools/RunFunctions + + cd $FOAM_RUN/tutorials/incompressible/icoFoam/cavity + + runApplication blockMesh + runApplication icoFoam + + + + + +Job submission + + + $ qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=16,walltime=03:00:00 test.sh + + + + For information about job submission please [look +here](../resource-allocation-and-job-execution/job-submission-and-execution.html "Job submission"). + +Running applications in parallel** +------------------------------------------------- + +Run the second case for example external incompressible turbulent +flow - case - motorBike. + +First we must run serial application bockMesh and decomposePar for +preparation of parallel computation. + +Create a Bash scrip test.sh: + + + #!/bin/bash + module load openfoam/2.2.1-icc-openmpi1.6.5-DP + source $FOAM_BASHRC + + # source to run functions + . $WM_PROJECT_DIR/bin/tools/RunFunctions + + cd $FOAM_RUN/tutorials/incompressible/simpleFoam/motorBike + + runApplication blockMesh + runApplication decomposePar + + + +Job submission + + + $ qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=16,walltime=03:00:00 test.sh + + + +This job create simple block mesh and domain decomposition. +Check your decomposition, and submit parallel computation: + +Create a PBS script> +testParallel.pbs: + + + #!/bin/bash + #PBS -N motorBike + #PBS -l select=2:ncpus=16 + #PBS -l walltime=01:00:00 + #PBS -q qprod + #PBS -A OPEN-0-0 + + module load openfoam/2.2.1-icc-openmpi1.6.5-DP + source $FOAM_BASHRC + + cd $FOAM_RUN/tutorials/incompressible/simpleFoam/motorBike + + nproc = 32 + + mpirun -hostfile ${PBS_NODEFILE} -np $nproc snappyHexMesh -overwrite -parallel | tee snappyHexMesh.log + + mpirun -hostfile ${PBS_NODEFILE} -np $nproc potentialFoam -noFunctionObject-writep -parallel | tee potentialFoam.log + + mpirun -hostfile ${PBS_NODEFILE} -np $nproc simpleFoam -parallel | tee simpleFoam.log + + + +nproc – number of subdomains + +Job submission + + + $ qsub testParallel.pbs + + + +Compile your own solver** +---------------------------------------- + +Initialize OpenFOAM environment before compiling your solver + + + $ module load openfoam/2.2.1-icc-openmpi1.6.5-DP + $ source $FOAM_BASHRC + $ cd $FOAM_RUN/ + +Create directory applications/solvers in user directory + + + $ mkdir -p applications/solvers + $ cd applications/solvers + + + +Copy icoFoam solver’s source files + + + $ cp -r $FOAM_SOLVERS/incompressible/icoFoam/ My_icoFoam + $ cd My_icoFoam + +Rename icoFoam.C to My_icoFOAM.C + + + $ mv icoFoam.C My_icoFoam.C + + + +Edit >*files* file in *Make* directory: + + + icoFoam.C + EXE = $(FOAM_APPBIN)/icoFoam + +and change to: + + My_icoFoam.C + EXE = $(FOAM_USER_APPBIN)/My_icoFoam + +In directory My_icoFoam give the compilation command: + + + $ wmake + +------------------------------------------------------------------------ + + + + Have a fun with OpenFOAM :)** + + id="__caret"> + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.md new file mode 100644 index 0000000000000000000000000000000000000000..af15c05074ea33347892db42012d41d0b6b7a7cf --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.md @@ -0,0 +1,13 @@ +Operating System +================ + +The operating system, deployed on ANSELM + + + +The operating system on Anselm is Linux - bullx Linux Server release +6.3. + +bullx Linux is based on Red Hat Enterprise Linux. bullx Linux is a Linux +distribution provided by Bull and dedicated to HPC applications. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/paraview.md b/converted/docs.it4i.cz/anselm-cluster-documentation/software/paraview.md new file mode 100644 index 0000000000000000000000000000000000000000..7aafe20e77601fc324681cc958b651ee1b37edcd --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/software/paraview.md @@ -0,0 +1,121 @@ +ParaView +======== + +An open-source, multi-platform data analysis and visualization +application + + + +Introduction +------------ + +ParaView** is an open-source, multi-platform data analysis and +visualization application. ParaView users can quickly build +visualizations to analyze their data using qualitative and quantitative +techniques. The data exploration can be done interactively in 3D or +programmatically using ParaView's batch processing capabilities. + +ParaView was developed to analyze extremely large datasets using +distributed memory computing resources. It can be run on supercomputers +to analyze datasets of exascale size as well as on laptops for smaller +data. + +Homepage : <http://www.paraview.org/> + +Installed version +----------------- + +Currently, version 4.0.1 compiled with GCC 4.8.1 against Bull MPI +library and OSMesa 10.0 is installed on Anselm. + +Usage +----- + +On Anselm, ParaView is to be used in client-server mode. A parallel +ParaView server is launched on compute nodes by the user, and client is +launched on your desktop PC to control and view the visualization. +Download ParaView client application for your OS here +: <http://paraview.org/paraview/resources/software.php>. Important : +your version must match the version number installed on Anselm** ! +(currently v4.0.1) + +### Launching server + +To launch the server, you must first allocate compute nodes, for example +:> + + $ qsub -I -q qprod -A OPEN-0-0 -l select=2 + +to launch an interactive session on 2 nodes. Refer to [Resource +Allocation and Job +Execution](../resource-allocation-and-job-execution/introduction.html) +for details. + +After the interactive session is opened, load the ParaView module : + + $ module add paraview + +Now launch the parallel server, with number of nodes times 16 processes +: + + $ mpirun -np 32 pvserver --use-offscreen-rendering + Waiting for client... + Connection URL: cs://cn77:11111 + Accepting connection(s): cn77:11111 + + Note the that the server is listening on compute node cn77 in this +case, we shall use this information later. + +### Client connection + +Because a direct connection is not allowed to compute nodes on Anselm, +you must establish a SSH tunnel to connect to the server. Choose a port +number on your PC to be forwarded to ParaView server, for example 12345. +If your PC is running Linux, use this command to estabilish a SSH tunnel +: + + ssh -TN -L 12345:cn77:11111 username@anselm.it4i.cz + +replace username with your login and cn77 +with the name of compute node your ParaView server is running on (see +previous step). If you use PuTTY on Windows, load Anselm connection +configuration, t>hen go to Connection-> +SSH>->Tunnels to set up the +port forwarding. Click Remote radio button. Insert 12345 to Source port +textbox. Insert cn77:11111. Click Add button, then Open. [Read +more about port +forwarding.](https://docs.it4i.cz/anselm-cluster-documentation/software/resolveuid/11e53ad0d2fd4c5187537f4baeedff33) + +Now launch ParaView client installed on your desktop PC. Select +File->Connect..., click Add Server. Fill in the following : + +Name : Anselm tunnel + +Server Type : Client/Server + +Host : localhost + +Port : 12345 + +Click Configure, Save, the configuration is now saved for later use. Now +click Connect to connect to the ParaView server. In your terminal where +you have interactive session with ParaView server launched, you should +see : + + Client connected. + +You can now use Parallel ParaView. + +### Close server + +Remember to close the interactive session after you finish working with +ParaView server, as it will remain launched even after your client is +disconnected and will continue to consume resources. + +GPU support +----------- + +Currently, GPU acceleration is not supported in the server and ParaView +will not take advantage of accelerated nodes on Anselm. Support for GPU +acceleration might be added in the future. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/software/virtualization-job-workflow b/converted/docs.it4i.cz/anselm-cluster-documentation/software/virtualization-job-workflow new file mode 100644 index 0000000000000000000000000000000000000000..f5602dd43de3879f6599a84170a7156100c27302 Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/software/virtualization-job-workflow differ diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.md b/converted/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.md new file mode 100644 index 0000000000000000000000000000000000000000..22b880420d3c4fcba0b5574ebe1865f4fdb4fa24 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.md @@ -0,0 +1,128 @@ +CESNET Data Storage +=================== + + + +Introduction +------------ + +Do not use shared filesystems at IT4Innovations as a backup for large +amount of data or long-term archiving purposes. + +The IT4Innovations does not provide storage capacity for data archiving. +Academic staff and students of research institutions in the Czech +Republic can use [CESNET Storage +service](https://du.cesnet.cz/). + +The CESNET Storage service can be used for research purposes, mainly by +academic staff and students of research institutions in the Czech +Republic. + +User of data storage CESNET (DU) association can become organizations or +an individual person who is either in the current employment +relationship (employees) or the current study relationship (students) to +a legal entity (organization) that meets the “Principles for access to +CESNET Large infrastructure (Access Policy)”. + +User may only use data storage CESNET for data transfer and storage +which are associated with activities in science, research, development, +the spread of education, culture and prosperity. In detail see +“Acceptable Use Policy CESNET Large Infrastructure (Acceptable Use +Policy, AUP)”. + +The service is documented at +<https://du.cesnet.cz/wiki/doku.php/en/start>. For special requirements +please contact directly CESNET Storage Department via e-mail +[du-support(at)cesnet.cz](mailto:du-support@cesnet.cz). + +The procedure to obtain the CESNET access is quick and trouble-free. + +(source +[https://du.cesnet.cz/](https://du.cesnet.cz/wiki/doku.php/en/start "CESNET Data Storage")) + +CESNET storage access +--------------------- + +### Understanding Cesnet storage + +It is very important to understand the Cesnet storage before uploading +data. Please read +<https://du.cesnet.cz/en/navody/home-migrace-plzen/start> first. + +Once registered for CESNET Storage, you may [access the +storage](https://du.cesnet.cz/en/navody/faq/start) in +number of ways. We recommend the SSHFS and RSYNC methods. + +### SSHFS Access + +SSHFS: The storage will be mounted like a local hard drive + +The SSHFS provides a very convenient way to access the CESNET Storage. +The storage will be mounted onto a local directory, exposing the vast +CESNET Storage as if it was a local removable harddrive. Files can be +than copied in and out in a usual fashion. + +First, create the mountpoint + + $ mkdir cesnet + +Mount the storage. Note that you can choose among the ssh.du1.cesnet.cz +(Plzen), ssh.du2.cesnet.cz (Jihlava), ssh.du3.cesnet.cz (Brno) +Mount tier1_home **(only 5120M !)**: + + $ sshfs username@ssh.du1.cesnet.cz:. cesnet/ + +For easy future access from Anselm, install your public key + + $ cp .ssh/id_rsa.pub cesnet/.ssh/authorized_keys + +Mount tier1_cache_tape for the Storage VO: + + $ sshfs username@ssh.du1.cesnet.cz:/cache_tape/VO_storage/home/username cesnet/ + +View the archive, copy the files and directories in and out + + $ ls cesnet/ + $ cp -a mydir cesnet/. + $ cp cesnet/myfile . + +Once done, please remember to unmount the storage + + $ fusermount -u cesnet + +### Rsync access + +Rsync provides delta transfer for best performance, can resume +interrupted transfers + +Rsync is a fast and extraordinarily versatile file copying tool. It is +famous for its delta-transfer algorithm, which reduces the amount of +data sent over the network by sending only the differences between the +source files and the existing files in the destination. Rsync is widely +used for backups and mirroring and as an improved copy command for +everyday use. + +Rsync finds files that need to be transferred using a "quick check" +algorithm (by default) that looks for files that have changed in size or +in last-modified time. Any changes in the other preserved attributes +(as requested by options) are made on the destination file directly when +the quick check indicates that the file's data does not need to be +updated. + +More about Rsync at +<https://du.cesnet.cz/en/navody/rsync/start#pro_bezne_uzivatele> + +Transfer large files to/from Cesnet storage, assuming membership in the +Storage VO + + $ rsync --progress datafile username@ssh.du1.cesnet.cz:VO_storage-cache_tape/. + $ rsync --progress username@ssh.du1.cesnet.cz:VO_storage-cache_tape/datafile . + +Transfer large directories to/from Cesnet storage, assuming membership +in the Storage VO + + $ rsync --progress -av datafolder username@ssh.du1.cesnet.cz:VO_storage-cache_tape/. + $ rsync --progress -av username@ssh.du1.cesnet.cz:VO_storage-cache_tape/datafolder . + +Transfer rates of about 28MB/s can be expected. + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.md b/converted/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.md new file mode 100644 index 0000000000000000000000000000000000000000..2a58b726d505798696080198c159d55dbc9138f9 --- /dev/null +++ b/converted/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.md @@ -0,0 +1,502 @@ +Storage +======= + + + +There are two main shared file systems on Anselm cluster, the +[HOME](../storage.html#home) and +[SCRATCH](../storage.html#scratch). All login and compute +nodes may access same data on shared filesystems. Compute nodes are also +equipped with local (non-shared) scratch, ramdisk and tmp filesystems. + +Archiving +--------- + +Please don't use shared filesystems as a backup for large amount of data +or long-term archiving mean. The academic staff and students of research +institutions in the Czech Republic can use [CESNET storage +service](cesnet-data-storage.html), which is available +via SSHFS. + +Shared Filesystems +------------------ + +Anselm computer provides two main shared filesystems, the [HOME +filesystem](../storage.html#home) and the [SCRATCH +filesystem](../storage.html#scratch). Both HOME and +SCRATCH filesystems are realized as a parallel Lustre filesystem. Both +shared file systems are accessible via the Infiniband network. Extended +ACLs are provided on both Lustre filesystems for the purpose of sharing +data with other users using fine-grained control. + +### Understanding the Lustre Filesystems + +(source <http://www.nas.nasa.gov>) + +A user file on the Lustre filesystem can be divided into multiple chunks +(stripes) and stored across a subset of the object storage targets +(OSTs) (disks). The stripes are distributed among the OSTs in a +round-robin fashion to ensure load balancing. + +When a client (a compute +node from your job) needs to create +or access a file, the client queries the metadata server ( +MDS) and the metadata target ( +MDT) for the layout and location of the +[file's +stripes](http://www.nas.nasa.gov/hecc/support/kb/Lustre_Basics_224.html#striping). +Once the file is opened and the client obtains the striping information, +the MDS is no longer involved in the +file I/O process. The client interacts directly with the object storage +servers (OSSes) and OSTs to perform I/O operations such as locking, disk +allocation, storage, and retrieval. + +If multiple clients try to read and write the same part of a file at the +same time, the Lustre distributed lock manager enforces coherency so +that all clients see consistent results. + +There is default stripe configuration for Anselm Lustre filesystems. +However, users can set the following stripe parameters for their own +directories or files to get optimum I/O performance: + +1. stripe_size: the size of the chunk in bytes; specify with k, m, or + g to use units of KB, MB, or GB, respectively; the size must be an + even multiple of 65,536 bytes; default is 1MB for all Anselm Lustre + filesystems +2. stripe_count the number of OSTs to stripe across; default is 1 for + Anselm Lustre filesystems one can specify -1 to use all OSTs in + the filesystem. +3. stripe_offset The index of the + OST where the first stripe is to be + placed; default is -1 which results in random selection; using a + non-default value is NOT recommended. + + + +Setting stripe size and stripe count correctly for your needs may +significantly impact the I/O performance you experience. + +Use the lfs getstripe for getting the stripe parameters. Use the lfs +setstripe command for setting the stripe parameters to get optimal I/O +performance The correct stripe setting depends on your needs and file +access patterns. + +` +$ lfs getstripe dir|filename +$ lfs setstripe -s stripe_size -c stripe_count -o stripe_offset dir|filename +` + +Example: + +` +$ lfs getstripe /scratch/username/ +/scratch/username/ +stripe_count: 1 stripe_size: 1048576 stripe_offset: -1 + +$ lfs setstripe -c -1 /scratch/username/ +$ lfs getstripe /scratch/username/ +/scratch/username/ +stripe_count: 10 stripe_size: 1048576 stripe_offset: -1 +` + +In this example, we view current stripe setting of the +/scratch/username/ directory. The stripe count is changed to all OSTs, +and verified. All files written to this directory will be striped over +10 OSTs + +Use lfs check OSTs to see the number and status of active OSTs for each +filesystem on Anselm. Learn more by reading the man page + +` +$ lfs check osts +$ man lfs +` + +### Hints on Lustre Stripping + +Increase the stripe_count for parallel I/O to the same file. + +When multiple processes are writing blocks of data to the same file in +parallel, the I/O performance for large files will improve when the +stripe_count is set to a larger value. The stripe count sets the number +of OSTs the file will be written to. By default, the stripe count is set +to 1. While this default setting provides for efficient access of +metadata (for example to support the ls -l command), large files should +use stripe counts of greater than 1. This will increase the aggregate +I/O bandwidth by using multiple OSTs in parallel instead of just one. A +rule of thumb is to use a stripe count approximately equal to the number +of gigabytes in the file. + +Another good practice is to make the stripe count be an integral factor +of the number of processes performing the write in parallel, so that you +achieve load balance among the OSTs. For example, set the stripe count +to 16 instead of 15 when you have 64 processes performing the writes. + +Using a large stripe size can improve performance when accessing very +large files + +Large stripe size allows each client to have exclusive access to its own +part of a file. However, it can be counterproductive in some cases if it +does not match your I/O pattern. The choice of stripe size has no effect +on a single-stripe file. + +Read more on +<http://wiki.lustre.org/manual/LustreManual20_HTML/ManagingStripingFreeSpace.html> + +### Lustre on Anselm + +The architecture of Lustre on Anselm is composed of two metadata +servers (MDS) and four data/object storage servers (OSS). Two object +storage servers are used for file system HOME and another two object +storage servers are used for file system SCRATCH. + + Configuration of the storages + +- HOME Lustre object storage + + + - One disk array NetApp E5400 + - 22 OSTs + - 227 2TB NL-SAS 7.2krpm disks + - 22 groups of 10 disks in RAID6 (8+2) + - 7 hot-spare disks + + + +- SCRATCH Lustre object storage + + + - Two disk arrays NetApp E5400 + - 10 OSTs + - 106 2TB NL-SAS 7.2krpm disks + - 10 groups of 10 disks in RAID6 (8+2) + - 6 hot-spare disks + + + +- Lustre metadata storage + + + - One disk array NetApp E2600 + - 12 300GB SAS 15krpm disks + - 2 groups of 5 disks in RAID5 + - 2 hot-spare disks + + + +###HOME + +The HOME filesystem is mounted in directory /home. Users home +directories /home/username reside on this filesystem. Accessible +capacity is 320TB, shared among all users. Individual users are +restricted by filesystem usage quotas, set to 250GB per user. >If +250GB should prove as insufficient for particular user, please +contact [support](https://support.it4i.cz/rt), +the quota may be lifted upon request. + +The HOME filesystem is intended for preparation, evaluation, processing +and storage of data generated by active Projects. + +The HOME filesystem should not be used to archive data of past Projects +or other unrelated data. + +The files on HOME filesystem will not be deleted until end of the [users +lifecycle](../../get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.html). + +The filesystem is backed up, such that it can be restored in case of +catasthropic failure resulting in significant data loss. This backup +however is not intended to restore old versions of user data or to +restore (accidentaly) deleted files. + +The HOME filesystem is realized as Lustre parallel filesystem and is +available on all login and computational nodes. +Default stripe size is 1MB, stripe count is 1. There are 22 OSTs +dedicated for the HOME filesystem. + +Setting stripe size and stripe count correctly for your needs may +significantly impact the I/O performance you experience. + +HOME filesystem +Mountpoint +/home +Capacity +320TB +Throughput +2GB/s +User quota +250GB +Default stripe size +1MB +Default stripe count +1 +Number of OSTs +22 +###SCRATCH + +The SCRATCH filesystem is mounted in directory /scratch. Users may +freely create subdirectories and files on the filesystem. Accessible +capacity is 146TB, shared among all users. Individual users are +restricted by filesystem usage quotas, set to 100TB per user. The +purpose of this quota is to prevent runaway programs from filling the +entire filesystem and deny service to other users. >If 100TB should +prove as insufficient for particular user, please contact +[support](https://support.it4i.cz/rt), the quota may be +lifted upon request. + +The Scratch filesystem is intended for temporary scratch data generated +during the calculation as well as for high performance access to input +and output files. All I/O intensive jobs must use the SCRATCH filesystem +as their working directory. + +Users are advised to save the necessary data from the SCRATCH filesystem +to HOME filesystem after the calculations and clean up the scratch +files. + +Files on the SCRATCH filesystem that are **not accessed for more than 90 +days** will be automatically **deleted**. + +The SCRATCH filesystem is realized as Lustre parallel filesystem and is +available from all login and computational nodes. +Default stripe size is 1MB, stripe count is 1. There are 10 OSTs +dedicated for the SCRATCH filesystem. + +Setting stripe size and stripe count correctly for your needs may +significantly impact the I/O performance you experience. + +SCRATCH filesystem +Mountpoint +/scratch +Capacity +146TB +Throughput +6GB/s +User quota +100TB +Default stripe size +1MB +Default stripe count +1 +Number of OSTs +10 +### Disk usage and quota commands + +User quotas on the file systems can be checked and reviewed using +following command: + +` +$ lfs quota dir +` + +Example for Lustre HOME directory: + +` +$ lfs quota /home +Disk quotas for user user001 (uid 1234): + Filesystem kbytes quota limit grace files quota limit grace + /home 300096 0 250000000 - 2102 0 500000 - +Disk quotas for group user001 (gid 1234): + Filesystem kbytes quota limit grace files quota limit grace + /home 300096 0 0 - 2102 0 0 - +` + +In this example, we view current quota size limit of 250GB and 300MB +currently used by user001. + +Example for Lustre SCRATCH directory: + +` +$ lfs quota /scratch +Disk quotas for user user001 (uid 1234): + Filesystem kbytes quota limit grace files quota limit grace +  /scratch    8    0 100000000000    -    3    0    0    - +Disk quotas for group user001 (gid 1234): + Filesystem kbytes quota limit grace files quota limit grace + /scratch    8    0    0    -    3    0    0    - +` + +In this example, we view current quota size limit of 100TB and 8KB +currently used by user001. + + + +To have a better understanding of where the space is exactly used, you +can use following command to find out. + +` +$ du -hs dir +` + +Example for your HOME directory: + +` +$ cd /home +$ du -hs * .[a-zA-z0-9]* | grep -E "[0-9]*G|[0-9]*M" | sort -hr +258M cuda-samples +15M .cache +13M .mozilla +5,5M .eclipse +2,7M .idb_13.0_linux_intel64_app +` + +This will list all directories which are having MegaBytes or GigaBytes +of consumed space in your actual (in this example HOME) directory. List +is sorted in descending order from largest to smallest +files/directories. + +To have a better understanding of previous commands, you can read +manpages. + +` +$ man lfs +` + +` +$ man du +` + +### Extended ACLs + +Extended ACLs provide another security mechanism beside the standard +POSIX ACLs which are defined by three entries (for +owner/group/others). Extended ACLs have more than the three basic +entries. In addition, they also contain a mask entry and may contain any +number of named user and named group entries. + +ACLs on a Lustre file system work exactly like ACLs on any Linux file +system. They are manipulated with the standard tools in the standard +manner. Below, we create a directory and allow a specific user access. + +` +[vop999@login1.anselm ~]$ umask 027 +[vop999@login1.anselm ~]$ mkdir test +[vop999@login1.anselm ~]$ ls -ld test +drwxr-x--- 2 vop999 vop999 4096 Nov 5 14:17 test +[vop999@login1.anselm ~]$ getfacl test +# file: test +# owner: vop999 +# group: vop999 +user::rwx +group::r-x +other::--- + +[vop999@login1.anselm ~]$ setfacl -m user:johnsm:rwx test +[vop999@login1.anselm ~]$ ls -ld test +drwxrwx---+ 2 vop999 vop999 4096 Nov 5 14:17 test +[vop999@login1.anselm ~]$ getfacl test +# file: test +# owner: vop999 +# group: vop999 +user::rwx +user:johnsm:rwx +group::r-x +mask::rwx +other::--- +` + +Default ACL mechanism can be used to replace setuid/setgid permissions +on directories. Setting a default ACL on a directory (-d flag to +setfacl) will cause the ACL permissions to be inherited by any newly +created file or subdirectory within the directory. Refer to this page +for more information on Linux ACL: + +[http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html ](http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html) + +Local Filesystems +----------------- + +### Local Scratch + +Every computational node is equipped with 330GB local scratch disk. + +Use local scratch in case you need to access large amount of small files +during your calculation. + +The local scratch disk is mounted as /lscratch and is accessible to +user at /lscratch/$PBS_JOBID directory. + +The local scratch filesystem is intended for temporary scratch data +generated during the calculation as well as for high performance access +to input and output files. All I/O intensive jobs that access large +number of small files within the calculation must use the local scratch +filesystem as their working directory. This is required for performance +reasons, as frequent access to number of small files may overload the +metadata servers (MDS) of the Lustre filesystem. + +The local scratch directory /lscratch/$PBS_JOBID will be deleted +immediately after the calculation end. Users should take care to save +the output data from within the jobscript. + +local SCRATCH filesystem +Mountpoint +/lscratch +Accesspoint +/lscratch/$PBS_JOBID +Capacity +330GB +Throughput +100MB/s +User quota +none +### RAM disk + +Every computational node is equipped with filesystem realized in memory, +so called RAM disk. + +Use RAM disk in case you need really fast access to your data of limited +size during your calculation. +Be very careful, use of RAM disk filesystem is at the expense of +operational memory. + +The local RAM disk is mounted as /ramdisk and is accessible to user +at /ramdisk/$PBS_JOBID directory. + +The local RAM disk filesystem is intended for temporary scratch data +generated during the calculation as well as for high performance access +to input and output files. Size of RAM disk filesystem is limited. Be +very careful, use of RAM disk filesystem is at the expense of +operational memory. It is not recommended to allocate large amount of +memory and use large amount of data in RAM disk filesystem at the same +time. + +The local RAM disk directory /ramdisk/$PBS_JOBID will be deleted +immediately after the calculation end. Users should take care to save +the output data from within the jobscript. + +RAM disk +Mountpoint + /ramdisk +Accesspoint + /ramdisk/$PBS_JOBID +Capacity +60GB at compute nodes without accelerator + +90GB at compute nodes with accelerator + +500GB at fat nodes + +Throughput +over 1.5 GB/s write, over 5 GB/s read, single thread +over 10 GB/s write, over 50 GB/s read, 16 threads + +User quota +none +### tmp + +Each node is equipped with local /tmp directory of few GB capacity. The +/tmp directory should be used to work with small temporary files. Old +files in /tmp directory are automatically purged. + +Summary + +---------- + + Mountpoint Usage Protocol Net Capacity Throughput Limitations Access Services + ------------------------------------ |---|---|----------------- ---------- ---------------- ------------ ------------- ---------- |**Version**|**Module**|------ + /home home directory Lustre 320 TiB 2 GB/s Quota 250GB Compute and login nodes backed up + /scratch cluster shared jobs' data Lustre 146 TiB 6 GB/s Quota 100TB Compute and login nodes files older 90 days removed + /lscratch node local jobs' data local 330 GB 100 MB/s none Compute nodes purged after job ends + /ramdisk node local jobs' data local 60, 90, 500 GB 5-50 GB/s none Compute nodes purged after job ends + /tmp local temporary files local 9.5 GB 100 MB/s none Compute and login nodes auto purged + + + diff --git a/converted/docs.it4i.cz/anselm-cluster-documentation/turbovncclientsetting.png b/converted/docs.it4i.cz/anselm-cluster-documentation/turbovncclientsetting.png new file mode 100644 index 0000000000000000000000000000000000000000..71a69099299012be590972e22b8120e11127426c Binary files /dev/null and b/converted/docs.it4i.cz/anselm-cluster-documentation/turbovncclientsetting.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/TightVNC_login.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/TightVNC_login.png new file mode 100644 index 0000000000000000000000000000000000000000..078dfc73a90b2b3ffc1648fa82ba4b0a109fbc29 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/TightVNC_login.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/XWinlistentcp.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/XWinlistentcp.png new file mode 100644 index 0000000000000000000000000000000000000000..fdb297d57cebc7f76f7fff949f8cb7fcfed7f9e3 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/XWinlistentcp.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.md new file mode 100644 index 0000000000000000000000000000000000000000..3631336e742b5e38f1a6853c98a420f578604d57 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.md @@ -0,0 +1,44 @@ +Cygwin and X11 forwarding +========================= + +### If no able to forward X11 using PuTTY to CygwinX + +` +[usename@login1.anselm ~]$ gnome-session & +[1] 23691 +[usename@login1.anselm ~]$ PuTTY X11 proxy: unable to connect to forwarded X server: Network error: Connection refused +PuTTY X11 proxy: unable to connect to forwarded X server: Network error: Connection refused + + (gnome-session:23691): WARNING **: Cannot open display:** +` + +  + +1. Locate and modify + Cygwin shortcut that + uses +  [startxwin](http://x.cygwin.com/docs/man1/startxwin.1.html) + locate + C:cygwin64binXWin.exe + + + change it + to + C:*cygwin64binXWin.exe -listen tcp* + +  + + + +2. + Check Putty settings: + Enable X11 + forwarding + + + +  + + + + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwinX11forwarding.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwinX11forwarding.png new file mode 100644 index 0000000000000000000000000000000000000000..910a22d546a63d685090bb5981200ac20a911d83 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwinX11forwarding.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gdmdisablescreensaver.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gdmdisablescreensaver.png new file mode 100644 index 0000000000000000000000000000000000000000..437d05effabe47a1dfd18129619398ef7ef74739 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gdmdisablescreensaver.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gdmscreensaver.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gdmscreensaver.png new file mode 100644 index 0000000000000000000000000000000000000000..7dbc23f6dcd9390d38bfd1b07650d69d87d818eb Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gdmscreensaver.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome-compute-nodes-over-vnc.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome-compute-nodes-over-vnc.png new file mode 100644 index 0000000000000000000000000000000000000000..b7d24165d5b2a486534c095b5fb156d83db10428 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome-compute-nodes-over-vnc.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome-terminal.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome-terminal.png new file mode 100644 index 0000000000000000000000000000000000000000..1f404e503edeb796ed0ee96c0ed81aaca64d97eb Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome-terminal.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome_screen.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome_screen.png new file mode 100644 index 0000000000000000000000000000000000000000..62d535f3bfaefb286d120a9aa85b33127665c644 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/gnome_screen.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.md new file mode 100644 index 0000000000000000000000000000000000000000..70dc2f50fd919763be7f921830af273e103bba7b --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.md @@ -0,0 +1,29 @@ +Graphical User Interface +======================== + + + +X Window System +--------------- + +The X Window system is a principal way to get GUI access to the +clusters. + +Read more about configuring [**X Window +System**](x-window-system/x-window-and-vnc.html). + +VNC +--- + +The **Virtual Network Computing** (**VNC**) is a graphical +[desktop +sharing](http://en.wikipedia.org/wiki/Desktop_sharing "Desktop sharing") +system that uses the [Remote Frame Buffer +protocol +(RFB)](http://en.wikipedia.org/wiki/RFB_protocol "RFB protocol") +to remotely control another +[computer](http://en.wikipedia.org/wiki/Computer "Computer"). + +Read more about configuring +**[VNC](../../../salomon/accessing-the-cluster/graphical-user-interface/vnc.html)**. + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/putty-tunnel.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/putty-tunnel.png new file mode 100644 index 0000000000000000000000000000000000000000..e2af3200f0c1c205cfb925f266a579ba65919249 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/putty-tunnel.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.md new file mode 100644 index 0000000000000000000000000000000000000000..c5a12afddbfa6c971301e937485c404bed4e8457 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.md @@ -0,0 +1,269 @@ +VNC +=== + + + +The **Virtual Network Computing** (**VNC**) is a graphical [desktop +sharing](http://en.wikipedia.org/wiki/Desktop_sharing "Desktop sharing") +system that uses the [Remote Frame Buffer protocol +(RFB)](http://en.wikipedia.org/wiki/RFB_protocol "RFB protocol") to +remotely control another +[computer](http://en.wikipedia.org/wiki/Computer "Computer"). It +transmits the +[keyboard](http://en.wikipedia.org/wiki/Computer_keyboard "Computer keyboard") +and +[mouse](http://en.wikipedia.org/wiki/Computer_mouse "Computer mouse") +events from one computer to another, relaying the graphical +[screen](http://en.wikipedia.org/wiki/Computer_screen "Computer screen") +updates back in the other direction, over a +[network](http://en.wikipedia.org/wiki/Computer_network "Computer network").(http://en.wikipedia.org/wiki/Virtual_Network_Computing#cite_note-1) + +The recommended clients are +[TightVNC](http://www.tightvnc.com) or +[TigerVNC](http://sourceforge.net/apps/mediawiki/tigervnc/index.php?title=Main_Page) +(free, open source, available for almost any platform). + +Create VNC password +------------------- + +Local VNC password should be set before the first login. Do use a strong +password. + +` +[username@login2 ~]$ vncpasswd +Password: +Verify: +` + +Start vncserver +--------------- + +To access VNC a local vncserver must be started first and also a tunnel +using SSH port forwarding must be established. +[See below](vnc.html#linux-example-of-creating-a-tunnel) +for the details on SSH tunnels. In this example we use port 61. + +You can find ports which are already occupied. Here you can see that +ports " /usr/bin/Xvnc :79" and " +/usr/bin/Xvnc :60" are occupied. + +` +[username@login2 ~]$ ps aux | grep Xvnc +username   5971 0.0 0.0 201072 92564 ?       SN  Sep22  4:19 /usr/bin/Xvnc :79 -desktop login2:79 (username) -auth /home/gre196/.Xauthority -geometry 1024x768 -rfbwait 30000 -rfbauth /home/username/.vnc/passwd -rfbport 5979 -fp catalogue:/etc/X11/fontpath.d -pn +username   10296 0.0 0.0 131772 21076 pts/29  SN  13:01  0:01 /usr/bin/Xvnc :60 -desktop login2:61 (username) -auth /home/username/.Xauthority -geometry 1600x900 -depth 16 -rfbwait 30000 -rfbauth /home/jir13/.vnc/passwd -rfbport 5960 -fp catalogue:/etc/X11/fontpath.d -pn +..... +` + +Choose free port e.g. 61 and start your VNC server: + +` +[username@login2 ~]$ vncserver :61 -geometry 1600x900 -depth 16 + +New 'login2:1 (username)' desktop is login2:1 + +Starting applications specified in /home/username/.vnc/xstartup +Log file is /home/username/.vnc/login2:1.log +` + +Check if VNC server is started on the port (in this example 61): + +` +[username@login2 .vnc]$ vncserver -list + +TigerVNC server sessions: + +X DISPLAY #    PROCESS ID +:61             18437 +` + +Another command: + +` +[username@login2 .vnc]$  ps aux | grep Xvnc + +username   10296 0.0 0.0 131772 21076 pts/29  SN  13:01  0:01 /usr/bin/Xvnc :61 -desktop login2:61 (username) -auth /home/jir13/.Xauthority -geometry 1600x900 -depth 16 -rfbwait 30000 -rfbauth /home/username/.vnc/passwd -rfbport 5961 -fp catalogue:/etc/X11/fontpath.d -pn +` + +To access the VNC server you have to create a tunnel between the login +node using TCP **port 5961** and your machine using a free TCP port (for +simplicity the very same, in this case). + +The tunnel must point to the same login node where you launched the VNC +server, eg. login2. If you use just cluster-name.it4i.cz, the tunnel +might point to a different node due to DNS round robin. + +###Linux/Mac OS example of creating a tunnel + +At your machine, create the tunnel: + +` +local $ ssh -TN -f username@login2.cluster-name.it4i.cz -L 5961:localhost:5961 +` + +Issue the following command to check the tunnel is established (please +note the PID 2022 in the last column, you'll need it for closing the +tunnel): + +` +local $ netstat -natp | grep 5961 +(Not all processes could be identified, non-owned process info + will not be shown, you would have to be root to see it all.) +tcp       0     0 127.0.0.1:5961         0.0.0.0:*              LISTEN     2022/ssh       +tcp6      0     0 ::1:5961               :::*                   LISTEN     2022/ssh +` + +Or on Mac OS use this command: + +` +local-mac $ lsof -n -i4TCP:5961 | grep LISTEN +ssh 75890 sta545 7u IPv4 0xfb062b5c15a56a3b 0t0 TCP 127.0.0.1:5961 (LISTEN) +` + +Connect with the VNC client: + +` +local $ vncviewer 127.0.0.1:5961 +` + +In this example, we connect to VNC server on port 5961, via the ssh +tunnel. The connection is encrypted and secured. The VNC server +listening on port 5961 provides screen of 1600x900 pixels. + +You have to destroy the SSH tunnel which is still running at the +background after you finish the work. Use the following command (PID +2022 in this case, see the netstat command above): + +` +kill 2022 +` + +### Windows example of creating a tunnel + +Use PuTTY to log in on cluster. + +Start vncserver using command vncserver described above. + +**Search for the localhost and port number (in this case +127.0.0.1:5961).** + +` +[username@login2 .vnc]$ netstat -tanp | grep Xvnc +(Not all processes could be identified, non-owned process info + will not be shown, you would have to be root to see it all.) +tcp       0     0 127.0.0.1:5961             0.0.0.0:*                  LISTEN     24031/Xvnc +` + +On the PuTTY Configuration screen go to Connection->SSH->Tunnels +to set up the tunnel. + +Fill the Source port and Destination fields. **Do not forget to click +the Add button**. + + + +Run the VNC client of your choice, select VNC server 127.0.0.1, port +5961 and connect using VNC password. + +### Example of starting TigerVNC viewer + + + +In this example, we connect to VNC server on port 5961, via the ssh +tunnel, using TigerVNC viewer. The connection is encrypted and secured. +The VNC server listening on port 5961 provides screen of 1600x900 +pixels. + +### Example of starting TightVNC Viewer + +Use your VNC password to log using TightVNC Viewer and start a Gnome +Session on the login node. + + + +Gnome session +------------- + +You should see after the successful login. + + + +###Disable your Gnome session screensaver + +Open Screensaver preferences dialog: + + + +Uncheck both options below the slider: + + + +### Kill screensaver if locked screen + +If the screen gets locked you have to kill the screensaver. Do not to +forget to disable the screensaver then. + +` +[username@login2 .vnc]$ ps aux | grep screen +username    1503 0.0 0.0 103244  892 pts/4   S+  14:37  0:00 grep screen +username    24316 0.0 0.0 270564 3528 ?       Ss  14:12  0:00 gnome-screensaver + +[username@login2 .vnc]$ kill 24316 +` + +### Kill vncserver after finished work + +You should kill your VNC server using command: + +` +[username@login2 .vnc]$ vncserver -kill :61 +Killing Xvnc process ID 7074 +Xvnc process ID 7074 already killed +` + +Or this way: + +` +[username@login2 .vnc]$ pkill vnc +` + +GUI applications on compute nodes over VNC +------------------------------------------ + +The very [same methods as described +above](https://docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-and-vnc#gui-applications-on-compute-nodes), +may be used to run the GUI applications on compute nodes. However, for +maximum performance, proceed following these steps: + +Open a Terminal (Applications -> System Tools -> Terminal). Run +all the next commands in the terminal. + + + +Allow incoming X11 graphics from the compute nodes at the login node: + +` +$ xhost + +` + +Get an interactive session on a compute node (for more detailed info +[look +here](../../../../anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.html)). +Use the **-v DISPLAY** option to propagate the DISPLAY on the compute +node. In this example, we want a complete node (24 cores in this +example) from the production queue: + +` +$ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A PROJECT_ID -q qprod -l select=1:ncpus=24 +` + +Test that the DISPLAY redirection into your VNC session works, by +running a X11 application (e. g. XTerm) on the assigned compute node: + +` +$ xterm +` + +Example described above: + + + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vncviewer.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vncviewer.png new file mode 100644 index 0000000000000000000000000000000000000000..78929324b4ae1156903d309e529eda10b7ccec50 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vncviewer.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.md new file mode 100644 index 0000000000000000000000000000000000000000..9f9415f98911d88fa4de72b3e10a931b48702c54 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.md @@ -0,0 +1,178 @@ +X Window System +=============== + + + +The X Window system is a principal way to get GUI access to the +clusters. The **X Window System** (commonly known as **X11**, based on +its current major version being 11, or shortened to simply **X**, and +sometimes informally **X-Windows**) is a computer software system and +network +[protocol](http://en.wikipedia.org/wiki/Protocol_%28computing%29 "Protocol (computing)") +that provides a basis for [graphical user +interfaces](http://en.wikipedia.org/wiki/Graphical_user_interface "Graphical user interface") +(GUIs) and rich input device capability for [networked +computers](http://en.wikipedia.org/wiki/Computer_network "Computer network"). + +The X display forwarding must be activated and the X server running on +client side + +### X display + +In order to display graphical user interface GUI of various software +tools, you need to enable the X display forwarding. On Linux and Mac, +log in using the -X option tho ssh client: + +` + local $ ssh -X username@cluster-name.it4i.cz +` + +### X Display Forwarding on Windows + +On Windows use the PuTTY client to enable X11 forwarding.  In PuTTY +menu, go to Connection->SSH->X11, mark the Enable X11 forwarding +checkbox before logging in. Then log in as usual. + +To verify the forwarding, type + +` +$ echo $DISPLAY +` + +if you receive something like + +` +localhost:10.0 +` + +then the X11 forwarding is enabled. + +### X Server + +In order to display graphical user interface GUI of various software +tools, you need running X server on your desktop computer. For Linux +users, no action is required as the X server is the default GUI +environment on most Linux distributions. Mac and Windows users need to +install and run the X server on their workstations. + +### X Server on OS X + +Mac OS users need to install [XQuartz +server](http://xquartz.macosforge.org/landing/). + +### X Server on Windows + +There are variety of X servers available for Windows environment. The +commercial Xwin32 is very stable and rich featured. The Cygwin +environment provides fully featured open-source XWin X server. For +simplicity, we recommend open-source X server by the [Xming +project](http://sourceforge.net/projects/xming/). For +stability and full features we recommend the +[XWin](http://x.cygwin.com/) X server by Cygwin + + |How to use Xwin |How to use Xming | + | --- | --- | + |[Install Cygwin](http://x.cygwin.com/)Find and execute XWin.exeto start the X server on Windows desktop computer.[If no able to forward X11 using PuTTY to CygwinX](x-window-system/cygwin-and-x11-forwarding.html)\ |<p>Use Xlaunch to configure the Xming.<p>Run Xmingto start the X server on Windows desktop computer.\ | + +Read more on +[http://www.math.umn.edu/systems_guide/putty_xwin32.html](http://www.math.umn.edu/systems_guide/putty_xwin32.shtml) + +### Running GUI Enabled Applications + +Make sure that X forwarding is activated and the X server is running. + +Then launch the application as usual. Use the & to run the application +in background. + +` +$ module load intel (idb and gvim not installed yet) +$ gvim & +` + +` +$ xterm +` + +In this example, we activate the intel programing environment tools, +then start the graphical gvim editor. + +### GUI Applications on Compute Nodes + +Allocate the compute nodes using -X option on the qsub command + +` +$ qsub -q qexp -l select=2:ncpus=24 -X -I +` + +In this example, we allocate 2 nodes via qexp queue, interactively. We +request X11 forwarding with the -X option. It will be possible to run +the GUI enabled applications directly on the first compute node. + +**Better performance** is obtained by logging on the allocated compute +node via ssh, using the -X option. + +` +$ ssh -X r24u35n680 +` + +In this example, we log in on the r24u35n680 compute node, with the X11 +forwarding enabled. + +HTML commented section #1 (no GUI on Compute nodes - Xvfb) + +### The Gnome GUI Environment + +The Gnome 2.28 GUI environment is available on the clusters. We +recommend to use separate X server window for displaying the Gnome +environment. + +### Gnome on Linux and OS X + +To run the remote Gnome session in a window on Linux/OS X computer, you +need to install Xephyr. Ubuntu package is +xserver-xephyr, on OS X it is part of +[XQuartz](http://xquartz.macosforge.org/landing/). +First, launch Xephyr on local machine: + +` +local $ Xephyr -ac -screen 1024x768 -br -reset -terminate :1 & +` + +This will open a new X window with size 1024x768 at DISPLAY :1. Next, +ssh to the cluster with DISPLAY environment variable set and launch + gnome-session + + local $ DISPLAY=:1.0 ssh -XC yourname@cluster-name.it4i.cz -i ~/.ssh/path_to_your_key + ... cluster-name MOTD... + yourname@login1.cluster-namen.it4i.cz $ gnome-session & + +On older systems where Xephyr is not available, you may also try Xnest +instead of Xephyr. Another option is to launch a new X server in a +separate console, via: + +` +xinit /usr/bin/ssh -XT -i .ssh/path_to_your_key yourname@cluster-namen.it4i.cz gnome-session -- :1 vt12 +` + +However this method does not seem to work with recent Linux +distributions and you will need to manually source +/etc/profile to properly set environment +variables for PBS. + +### Gnome on Windows + +Use Xlaunch to start the Xming server or run the XWin.exe. Select the +''One window" mode. + +Log in to the cluster, using PuTTY. On the cluster, run the +gnome-session command. + +` +$ gnome-session & +` + +In this way, we run remote gnome session on the cluster, displaying it +in the local X server + +Use System->Log Out to close the gnome-session + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..bcf1363294dde3988c00798051c898694d7f9d21 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.md @@ -0,0 +1,22 @@ +Accessing the Clusters +====================== + +The IT4Innovations clusters are accessed by SSH protocol via login +nodes. + +Read more on [Accessing the Salomon +Cluste](../salomon/accessing-the-cluster.html)r or +[Accessing the Anselm +Cluster](../anselm-cluster-documentation/accessing-the-cluster.html) +pages. + +### PuTTY + +On **Windows**, use [PuTTY ssh +client](accessing-the-clusters/shell-access-and-data-transfer/putty/putty.html). + +### SSH keys + +Read more about [SSH keys +management](accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.html). + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PageantV.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PageantV.png new file mode 100644 index 0000000000000000000000000000000000000000..7a08be3c3b44dadfe4144fbd83639be0eb6151a5 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PageantV.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_host_Salomon.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_host_Salomon.png new file mode 100644 index 0000000000000000000000000000000000000000..61e1928f02f8348d4719cbeefe803eeb035e03e4 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_host_Salomon.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_keyV.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_keyV.png new file mode 100644 index 0000000000000000000000000000000000000000..6d847de4a235c9d5cb1b134bb12bca3d6abab404 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_keyV.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_open_Salomon.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_open_Salomon.png new file mode 100644 index 0000000000000000000000000000000000000000..97e381683908ac7c813fed7b97a962a4ba44d223 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_open_Salomon.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_save_Salomon.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_save_Salomon.png new file mode 100644 index 0000000000000000000000000000000000000000..886a79f60ca983084ae7218f21e5fe6e39fbcd00 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuTTY_save_Salomon.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygeneratorV.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygeneratorV.png new file mode 100644 index 0000000000000000000000000000000000000000..7c3014a9443f5a65d56ff64c6a2639e35b4514de Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygeneratorV.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_001V.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_001V.png new file mode 100644 index 0000000000000000000000000000000000000000..ba3183be0894ea4edffa9d514a55ce20e766e304 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_001V.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_002V.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_002V.png new file mode 100644 index 0000000000000000000000000000000000000000..f15167f04fa6006f27908a97f61b8a5347ef2248 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_002V.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_003V.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_003V.png new file mode 100644 index 0000000000000000000000000000000000000000..73a5505a2b2ac4aa1fd0adf4c3a74adcca0c8c08 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_003V.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_004V.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_004V.png new file mode 100644 index 0000000000000000000000000000000000000000..d90756b3a9a1816e48b37a66cfef174cb281778a Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_004V.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_005V.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_005V.png new file mode 100644 index 0000000000000000000000000000000000000000..0fe5c1258aeafbe5c564fb6655e480f8417f27d7 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_005V.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_006V.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_006V.png new file mode 100644 index 0000000000000000000000000000000000000000..cee16e08f1c8d588a148e76f8374d1f16d3fd410 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/PuttyKeygenerator_006V.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..3f5be332aa8bbce50bdfb701b94ff82fecff1fcc --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.md @@ -0,0 +1,21 @@ +Accessing the Clusters +====================== + +The IT4Innovations clusters are accessed by SSH protocol via login +nodes. + +Read more on [Accessing the Salomon +Cluste](../../../salomon/accessing-the-cluster.html)r or +[Accessing the Anselm +Cluster](../../../anselm-cluster-documentation/accessing-the-cluster.html) +pages. + +### PuTTY + +On **Windows**, use [PuTTY ssh +client](putty/putty.html). + +### SSH keys + +Read more about [SSH keys management](ssh-keys.html). + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.md new file mode 100644 index 0000000000000000000000000000000000000000..eb8d63db1e363728be96c511e7c88ac46af60ec7 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.md @@ -0,0 +1,19 @@ +Pageant SSH agent +================= + + + +Pageant holds your private key in memory without needing to retype a +passphrase on every login. + +- Run Pageant. +- On Pageant Key List press *Add key* and select your private + key (id_rsa.ppk). +- Enter your passphrase. +- Now you have your private key in memory without needing to retype a + passphrase on every login. + +  + + + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.md new file mode 100644 index 0000000000000000000000000000000000000000..b3e8ac55e8c717c08cb7af7294514312ababac88 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.md @@ -0,0 +1,89 @@ +PuTTY +===== + + + +PuTTY - before we start SSH connection +--------------------------------------------------------------------------------- + +### Windows PuTTY Installer + +We recommned you to download "**A Windows installer for everything +except PuTTYtel**" with **Pageant*** (SSH authentication agent) and +**PuTTYgen** (PuTTY key generator) which is available +[here](http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html). + + After installation you can proceed directly +to private keys authentication using +["Putty"](putty.html#putty). +"Change Password for Existing Private Key" is optional. +"Generate a New Public/Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)` pair" is intended for users without +Public/Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)` in the initial email containing login credentials. +"Pageant" is optional. + +### PuTTYgen + +PuTTYgen is the PuTTY key generator. Read more how to load in an +existing private key and change your passphrase or generate a new +public/private key pair using [PuTTYgen](puttygen.html) +if needed. + +### Pageant SSH agent + +[Pageant](pageant.html) holds your private key in memory +without needing to retype a passphrase on every login. We recommend its +usage. + +PuTTY - how to connect to the IT4Innovations cluster +-------------------------------------------------------- + +- Run PuTTY +- Enter Host name and Save session fields with [Login + address](../../../../salomon/accessing-the-cluster/shell-and-data-access/shell-and-data-access.html) + and browse Connection - > SSH -> Auth menu. + The *Host Name* input may be in the format + **"username@clustername.it4i.cz"** so you don't have to type your + login each time. + In this example we will connect to the Salomon cluster using +  **"salomon.it4i.cz"**. + +  + + + +- Category -> Connection - > SSH -> Auth: + Select Attempt authentication using Pageant. + Select Allow agent forwarding. + Browse and select your [private + key](../ssh-keys.html) file. + +  + +- Return to Session page and Save selected configuration with *Save* + button. + +  + +- Now you can log in using *Open* button. + +  + +- Enter your username if the *Host Name* input is not in the format + "username@salomon.it4i.cz". + +- Enter passphrase for selected [private + key](../ssh-keys.html) file if Pageant **SSH + authentication agent is not used.** + + +Another PuTTY Settings +---------------------- + +- Category -> Windows -> Translation -> Remote character set + and select **UTF-8**. + +- Category -> Terminal -> Features and select **Disable + application keypad mode** (enable numpad) +- Save your configuration on Session page in to Default Settings with + *Save* button . + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.md new file mode 100644 index 0000000000000000000000000000000000000000..33228da49215e9b2a979d6f1480a2df4868a69e8 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.md @@ -0,0 +1,70 @@ +PuTTY key generator +=================== + + + +PuTTYgen is the PuTTY key generator. You can load in an existing private +key and change your passphrase or generate a new public/private key +pair. + +### Change Password for Existing Private Key + +You can change the password of your SSH key with "PuTTY Key Generator". +Make sure to backup the key. + +- Load your [private key](../ssh-keys.html) file with + *Load* button. +- Enter your current passphrase. +- Change key passphrase. +- Confirm key passphrase. +- Save your private key with *Save private key* button. + +  + + + +### Generate a New Public/Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)` pair + +You can generate an additional public/private key pair and insert public +key into authorized_keys file for authentication with your own private +key. + +- Start with *Generate* button. + +  + +- Generate some randomness. + +  + +- Wait. + +  + +- Enter a *comment* for your key using format + 'username@organization.example.com'. + Enter key passphrase. + Confirm key passphrase. + Save your new private key `in "*.ppk" `format with *Save private + key* button. + +  + +- Save the public key with *Save public key* button. + You can copy public key out of the â€Public key for pasting into + authorized_keys file’ box. + +  + +- Export private key in OpenSSH format "id_rsa" using Conversion + -> Export OpenSSH key + +  + +- Now you can insert additional public key into authorized_keys file + for authentication with your own private key. + You must log in using ssh key received after registration. Then + proceed to [How to add your own + key](../ssh-keys.html). + + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md new file mode 100644 index 0000000000000000000000000000000000000000..e6055adb455bae6e89c944a8d73325730ff74072 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md @@ -0,0 +1,146 @@ +SSH keys +======== + + + + Key management +------------------------------------------------------------------- + +After logging in, you can see .ssh/ directory with SSH keys and +authorized_keys file: + + $ cd /home/username/ + $ ls -la .ssh/ + total 24 + drwx------ 2 username username 4096 May 13 15:12 . + drwxr-x---22 username username 4096 May 13 07:22 .. + -rw-r--r-- 1 username username 392 May 21 2014 authorized_keys + -rw------- 1 username username 1675 May 21 2014 id_rsa + -rw------- 1 username username 1460 May 21 2014 id_rsa.ppk + -rw-r--r-- 1 username username 392 May 21 2014 id_rsa.pub + + Please note that private keys in +.ssh directory are without passphrase and allow you to connect within +the cluster. + +### Access privileges on .ssh folder + +- `.ssh`  directory: 700 (drwx------) + +  directory: + 700 (drwx------) +- + Authorized_keys, known_hosts and public key (`.pub` file): `644 (-rw-r--r--)` + + + known_hosts and + public key + (`.pub` + +  file): + + `644 (-rw-r--r--)` +- + `` + + Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)` + (`id_rsa/id_rsa.ppk` + ): + `600 (-rw-------)` + + + + cd /home/username/ + chmod 700 .ssh/ + chmod 644 .ssh/authorized_keys + chmod 644 .ssh/id_rsa.pub + chmod 644 .ssh/known_hosts + chmod 600 .ssh/id_rsa + chmod 600 .ssh/id_rsa.ppk + +Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)` +----------- + +The path to a private key is usually /home/username/.ssh/ + +Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)` file in "id_rsa" or `"*.ppk" `format is used to +authenticate with the servers. +Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)` is present locally +on local side and used for example in SSH agent [Pageant (for Windows +users)](putty/PageantV.png). The private key should +always be kept in a safe place. + + An example of private key +format: + + -----BEGIN RSA PRIVATE KEY----- + MIIEpAIBAAKCAQEAqbo7jokygnBpG2wYa5NB45ns6+UKTNLMLHF0BO3zmRtKEElE + aGqXfbYwvXlcuRb2d9/Y5dVpCZHV0kbY3NhtVOcEIe+1ROaiU9BEsUAhMNEvgiLV + gSql4QvRO4BWPlM8+WAWXDp3oeoBh8glXyuh9teb8yq98fv1r1peYGRrW3/s4V+q + O1SQ0XY2T7rWCYRLIP6rTMXArTI35v3WU513mn7nm1fJ7oN0QgVH5b0W9V1Kyc4l + 9vILHeMXxvz+i/5jTEfLOJpiRGYZYcaYrE4dIiHPl3IlbV7hlkK23Xb1US8QJr5G + ADxp1VTkHjY+mKagEfxl1hQIb42JLHhKMEGqNQIDAQABAoIBAQCkypPuxZjL+vai + UGa5dAWiRZ46P2yrwHPKpvEdpCdDPbLAc1K/CtdBkHZsUPxNHVV6eFWweW99giIY + Av+mFWC58X8asBHQ7xkmxW0cqAZRzpkRAl9IBS9/fKjO28Fgy/p+suOi8oWbKIgJ + 3LMkX0nnT9oz1AkOfTNC6Tv+3SE7eTj1RPcMjur4W1Cd1N3EljLszdVk4tLxlXBS + yl9NzVnJJbJR4t01l45VfFECgYEAno1WJSB/SwdZvS9GkfhvmZd3r4vyV9Bmo3dn + XZAh8HRW13imOnpklDR4FRe98D9A7V3yh9h60Co4oAUd6N+Oc68/qnv/8O9efA+M + /neI9ANYFo8F0+yFCp4Duj7zPV3aWlN/pd8TNzLqecqh10uZNMy8rAjCxybeZjWd + DyhgywXhAoGBAN3BCazNefYpLbpBQzwes+f2oStvwOYKDqySWsYVXeVgUI+OWTVZ + eZ26Y86E8MQO+q0TIxpwou+TEaUgOSqCX40Q37rGSl9K+rjnboJBYNCmwVp9bfyj + kCLL/3g57nTSqhgHNa1xwemePvgNdn6FZteA8sXiCg5ZzaISqWAffek5AoGBAMPw + V/vwQ96C8E3l1cH5cUbmBCCcfXM2GLv74bb1V3SvCiAKgOrZ8gEgUiQ0+TfcbAbe + 7MM20vRNQjaLTBpai/BTbmqM1Q+r1KNjq8k5bfTdAoGANgzlNM9omM10rd9WagL5 + yuJcal/03p048mtB4OI4Xr5ZJISHze8fK4jQ5veUT9Vu2Fy/w6QMsuRf+qWeCXR5 + RPC2H0JzkS+2uZp8BOHk1iDPqbxWXJE9I57CxBV9C/tfzo2IhtOOcuJ4LY+sw+y/ + ocKpJbdLTWrTLdqLHwicdn8OxeWot1mOukyK2l0UeDkY6H5pYPtHTpAZvRBd7ETL + Zs2RP3KFFvho6aIDGrY0wee740/jWotx7fbxxKwPyDRsbH3+1Wx/eX2RND4OGdkH + gejJEzpk/7y/P/hCad7bSDdHZwO+Z03HIRC0E8yQz+JYatrqckaRCtd7cXryTmTR + FbvLJmECgYBDpfno2CzcFJCTdNBZFi34oJRiDb+HdESXepk58PcNcgK3R8PXf+au + OqDBtZIuFv9U1WAg0gzGwt/0Y9u2c8m0nXziUS6AePxy5sBHs7g9C9WeZRz/nCWK + +cHIm7XOwBEzDKz5f9eBqRGipm0skDZNKl8X/5QMTT5K3Eci2n+lTw== + -----END RSA PRIVATE KEY----- + +Public key +---------- + +Public key file in "*.pub" format is used to +verify a + +digital signature. Public +key is present on the remote +side and allows access to +the owner of the matching private key. + + An example of public key +format: + + ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCpujuOiTKCcGkbbBhrk0Hjmezr5QpM0swscXQE7fOZG0oQSURoapd9tjC9eVy5FvZ339jl1WkJkdXSRtjc2G1U5wQh77VE5qJT0ESxQCEw0S+CItWBKqXhC9E7gFY+UyP5YBZcOneh6gGHyCVfK6H215vzKr3x+/WvWl5gZGtbf+zhX6o4RJDRdjZPutYJhEsg/qtMxcCtMjfm/dZTnXeafuebV8nug3RCBUflvRb1XUrJuiX28gsd4xfG/P6L/mNMR8s4kmJEZhlhxpj8Th0iIc+XciVtXuGWQrbddcVRLxAmvkYAPGnVVOQeNj69pqAR/GXaFAhvjYkseEowQao1 username@organization.example.com + +### How to add your own key + +First, generate a new keypair of your public and private key: + + local $ ssh-keygen -C 'username@organization.example.com' -f additional_key + +Please, enter **strong** **passphrase** for securing your private key. + +You can insert additional public key into authorized_keys file for +authentication with your own private key. Additional records in +authorized_keys file must be delimited by new line. Users are +not advised to remove the default public key from authorized_keys file. + +Example: + + $ cat additional_key.pub > ~/.ssh/authorized_keys + +In this example, we add an additional public key, stored in file +additional_key.pub into the authorized_keys. Next time we log in, we +will be able to use the private addtional_key key to log in. + +### How to remove your own key + +Removing your key from authorized_keys can be done simply by deleting +the corresponding public key which can be identified by a comment at the +end of line (eg. username@organization.example.com). + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md new file mode 100644 index 0000000000000000000000000000000000000000..22f2e4bca3f27cebef3b76f66211257f09c66c9e --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.md @@ -0,0 +1,27 @@ +VPN - Connection fail in Win 8.1 +================================ + +**Failed to initialize connection subsystem Win 8.1 - 02-10-15 MS patch + +AnyConnect users on Windows 8.1 will receive a "Failed to initialize +connection subsystem" error after installing the Windows 8.1 02/10/15 +security patch. This OS defect introduced with the 02/10/15 patch update +will also impact WIndows 7 users with IE11. Windows Server +2008/2012 are also impacted by this defect, but neither is a supported +OS for AnyConnect. + +**Workaround:** + +- Close the Cisco AnyConnect Window and the taskbar mini-icon +- Right click vpnui.exe in the 'Cisco AnyConnect Secure Mobility + Client' folder. (C:Program Files (x86)CiscoCisco AnyConnect + Secure Mobility +- Client) +- Click on the 'Run compatibility troubleshooter' button +- Choose 'Try recommended settings' +- The wizard suggests Windows 8 compatibility. +- Click 'Test Program'. This will open the program. +- Close + + + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpnuiV.png b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpnuiV.png new file mode 100644 index 0000000000000000000000000000000000000000..076e2c4f9873d72937ecab0bb542a7690f6a70dc Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpnuiV.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.md b/converted/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.md new file mode 100644 index 0000000000000000000000000000000000000000..165ccc7ba2823473be622c1b500b9069a5734cb6 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.md @@ -0,0 +1,38 @@ +Applying for Resources +====================== + + + +Computational resources may be allocated by any of the following +[Computing resources +allocation](http://www.it4i.cz/computing-resources-allocation/?lang=en) +mechanisms. + +Academic researchers can apply for computational resources via [Open +Access +Competitions](http://www.it4i.cz/open-access-competition/?lang=en&lang=en). + +Anyone is welcomed to apply via the [Directors +Discretion.](http://www.it4i.cz/obtaining-computational-resources-through-directors-discretion/?lang=en&lang=en) + +Foreign (mostly European) users can obtain computational resources via +the [PRACE (DECI) +program](http://www.prace-ri.eu/DECI-Projects). + +In all cases, IT4Innovations’ access mechanisms are aimed at +distributing computational resources while taking into account the +development and application of supercomputing methods and their benefits +and usefulness for society. The applicants are expected to submit a +proposal. In the proposal, the applicants **apply for a particular +amount of core-hours** of computational resources. The requested +core-hours should be substantiated by scientific excellence of the +proposal, its computational maturity and expected impacts. +Proposals do undergo a scientific, technical and economic +evaluation. The allocation decisions are based on this +evaluation. More information at [Computing resources +allocation](http://www.it4i.cz/computing-resources-allocation/?lang=en) +and [Obtaining Login +Credentials](obtaining-login-credentials.html) page. + + + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/Authorization_chain.png b/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/Authorization_chain.png new file mode 100644 index 0000000000000000000000000000000000000000..c7747cfc28c0a4e7ab98034543e602ee3fc88d79 Binary files /dev/null and b/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/Authorization_chain.png differ diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.md b/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.md new file mode 100644 index 0000000000000000000000000000000000000000..524ef4e374c26f87276e276c851df14e1675b927 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.md @@ -0,0 +1,304 @@ +Certificates FAQ +================ + +FAQ about certificates in general + + + +Q: What are certificates? +------------------------- + +IT4Innovations employs X.509 certificates for secure communication (e. +g. credentials exchange) and for grid services related to PRACE, as they +present a single method of authentication for all PRACE services, where +only one password is required. + +There are different kinds of certificates, each with a different scope +of use. We mention here: + +- User (Private) certificates + +- Certificate Authority (CA) certificates + +- Host certificates + +- Service certificates + + + +**However, users need only manage User and CA certificates. Note that your +user certificate is protected by an associated private key, and this +private key must never be disclosed**. + +Q: Which X.509 certificates are recognised by IT4Innovations? +------------------------------------------------------------- + +Any certificate that has been issued by a Certification Authority (CA) +from a member of the IGTF ([http:www.igtf.net](http://www.igtf.net/)) is +recognised by IT4Innovations: European certificates are issued by +members of the EUGridPMA +([https://www.eugridmpa.org](https://www.eugridpma.org/)), which is part +of the IGTF and coordinates the trust fabric for e-Science Grid +authentication within Europe. Further the Czech *"Qualified certificate" +(KvalifikovanĂ˝ certifikát)* (provided by <http://www.postsignum.cz/> or +<http://www.ica.cz/Kvalifikovany-certifikat.aspx>), that is used in +electronic contact with Czech public authorities is accepted. + +Q: How do I get a User Certificate that can be used with IT4Innovations? +------------------------------------------------------------------------ + +To get a certificate, you must make a request to your local, IGTF +approved, Certificate Authority (CA). Usually you then must visit, in +person, your nearest Registration Authority (RA) to verify your +affiliation and identity (photo identification is required). Usually, +you will then be emailed details on how to retrieve your certificate, +although procedures can vary between CAs. If you are in Europe, you can +locate your trusted CA via <http://www.eugridpma.org/members/worldmap>. + +In some countries certificates can also be retrieved using the TERENA +Certificate Service, see the FAQ below for the link. + +Q: Does IT4Innovations support short lived certificates (SLCS)? +--------------------------------------------------------------- + +Yes, provided that the CA which provides this service is also a member +of IGTF. + +Q: Does IT4Innovations support the TERENA certificate service? +-------------------------------------------------------------- + +Yes, ITInnovations supports TERENA eScience personal certificates. For +more information, please visit +[https://tcs-escience-portal.terena.org](https://tcs-escience-portal.terena.org/){.spip_url +.spip_out}, where you also can find if your organisation/country can use +this service + +Q: What format should my certificate take? +------------------------------------------ + +User Certificates come in many formats, the three most common being the +’PKCS12’, ’PEM’ and the JKS formats. + +The PKCS12 (often abbreviated to ’p12’) format stores your user +certificate, along with your associated private key, in a single file. +This form of your certificate is typically employed by web browsers, +mail clients, and grid services like UNICORE, DART, gsissh-term and +Globus toolkit (GSI-SSH, GridFTP and GRAM5). + +The PEM format (*.pem) stores your user certificate and your associated +private key in two separate files. This form of your certificate can be +used by PRACE’s gsissh-term and with the grid related services like +Globus toolkit (GSI-SSH, GridFTP and GRAM5). + +To convert your Certificate from PEM to p12 formats, and *vice versa*, +IT4Innovations recommends using the openssl tool (see separate FAQ +entry). + +JKS is the Java KeyStore and may contain both your personal certificate +with your private key and a list of your trusted CA certificates. This +form of your certificate can be used by grid services like DART and +UNICORE6. + +To convert your Certificate from p12 to JKS, IT4Innovations recommends +using the keytool utiliy (see separate FAQ entry). + +Q: What are CA certificates? +---------------------------- + +Certification Authority (CA) certificates are used to verify the link +between your user certificate and the authority which issued it. They +are also used to verify the link between the host certificate of a +IT4Innovations server and the CA which issued that certificate. In +essence they establish a chain of trust between you and the target +server. Thus, for some grid services, users must have a copy of all the +CA certificates. + +To assist users, SURFsara (a member of PRACE) provides a complete and +up-to-date bundle of all the CA certificates that any PRACE user (or +IT4Innovations grid services user) will require. Bundle of certificates, +in either p12, PEM or JKS formats, are available from +<http://winnetou.sara.nl/prace/certs/>. + +It is worth noting that gsissh-term and DART automatically updates their +CA certificates from this SURFsara website. In other cases, if you +receive a warning that a server’s certificate can not be validated (not +trusted), then please update your CA certificates via the SURFsara +website. If this fails, then please contact the IT4Innovations helpdesk. + +Lastly, if you need the CA certificates for a personal Globus 5 +installation, then you can install the CA certificates from a MyProxy +server with the following command. + + myproxy-get-trustroots -s myproxy-prace.lrz.de + +If you run this command as ’root’, then it will install the certificates +into /etc/grid-security/certificates. If you run this not as ’root’, +then the certificates will be installed into +$HOME/.globus/certificates. For Globus, you can download the +globuscerts.tar.gz packet from <http://winnetou.sara.nl/prace/certs/>. + +Q: What is a DN and how do I find mine? +--------------------------------------- + +DN stands for Distinguished Name and is part of your user certificate. +IT4Innovations needs to know your DN to enable your account to use the +grid services. You may use openssl (see below) to determine your DN or, +if your browser contains your user certificate, you can extract your DN +from your browser. + +For Internet Explorer users, the DN is referred to as the "subject" of +your certificate. Tools->Internet +Options->Content->Certificates->View->Details->Subject. + +For users running Firefox under Windows, the DN is referred to as the +"subject" of your certificate. +Tools->Options->Advanced->Encryption->View Certificates. +Highlight your name and then Click View->Details->Subject. + +Q: How do I use the openssl tool? +--------------------------------- + +The following examples are for Unix/Linux operating systems only. + +To convert from PEM to p12, enter the following command: + + openssl pkcs12 -export -in usercert.pem -inkey userkey.pem -out + username.p12 + +To convert from p12 to PEM, type the following *four* commands: + + openssl pkcs12 -in username.p12 -out usercert.pem -clcerts -nokeys + openssl pkcs12 -in username.p12 -out userkey.pem -nocerts + chmod 444 usercert.pem + chmod 400 userkey.pem + +To check your Distinguished Name (DN), enter the following command: + + openssl x509 -in usercert.pem -noout -subject -nameopt + RFC2253 + +To check your certificate (e.g., DN, validity, issuer, public key +algorithm, etc.), enter the following command: + + openssl x509 -in usercert.pem -text -noout + +To download openssl for both Linux and Windows, please visit +<http://www.openssl.org/related/binaries.html>. On Macintosh Mac OS X +computers openssl is already pre-installed and can be used immediately. + +Q: How do I create and then manage a keystore? +---------------------------------------------- + +IT4innovations recommends the java based keytool utility to create and +manage keystores, which themselves are stores of keys and certificates. +For example if you want to convert your pkcs12 formatted key pair into a +java keystore you can use the following command. + + keytool -importkeystore -srckeystore $my_p12_cert -destkeystore + $my_keystore -srcstoretype pkcs12 -deststoretype jks -alias + $my_nickname -destalias $my_nickname + +where $my_p12_cert is the name of your p12 (pkcs12) certificate, +$my_keystore is the name that you give to your new java keystore and +$my_nickname is the alias name that the p12 certificate was given and +is used also for the new keystore. + +You also can import CA certificates into your java keystore with the +tool, e.g.: + + keytool -import -trustcacerts -alias $mydomain -file $mydomain.crt -keystore $my_keystore + +where $mydomain.crt is the certificate of a trusted signing authority +(CA) and $mydomain is the alias name that you give to the entry. + +More information on the tool can be found +at:<http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/keytool.html> + +Q: How do I use my certificate to access the different grid Services? +--------------------------------------------------------------------- + +Most grid services require the use of your certificate; however, the +format of your certificate depends on the grid Service you wish to +employ. + +If employing the PRACE version of GSISSH-term (also a Java Web Start +Application), you may use either the PEM or p12 formats. Note that this +service automatically installs up-to-date PRACE CA certificates. + +If the grid service is UNICORE, then you bind your certificate, in +either the p12 format or JKS, to UNICORE during the installation of the +client on your local machine. For more information, please visit +[UNICORE6 in PRACE](http://www.prace-ri.eu/UNICORE6-in-PRACE) + +If the grid service is part of Globus, such as GSI-SSH, GriFTP or GRAM5, +then the certificates can be in either p12 or PEM format and must reside +in the "$HOME/.globus" directory for Linux and Mac users or +%HOMEPATH%.globus for Windows users. (Windows users will have to use +the DOS command ’cmd’ to create a directory which starts with a ’.’). +Further, user certificates should be named either "usercred.p12" or +"usercert.pem" and "userkey.pem", and the CA certificates must be kept +in a pre-specified directory as follows. For Linux and Mac users, this +directory is either $HOME/.globus/certificates or +/etc/grid-security/certificates. For Windows users, this directory is +%HOMEPATH%.globuscertificates. (If you are using GSISSH-Term from +prace-ri.eu then you do not have to create the .globus directory nor +install CA certificates to use this tool alone). + +Q: How do I manually import my certificate into my browser? +----------------------------------------------------------- + +If you employ the Firefox browser, then you can import your certificate +by first choosing the "Preferences" window. For Windows, this is +Tools->Options. For Linux, this is Edit->Preferences. For Mac, +this is Firefox->Preferences. Then, choose the "Advanced" button; +followed by the "Encryption" tab. Then, choose the "Certificates" panel; +select the option "Select one automatically" if you have only one +certificate, or "Ask me every time" if you have more then one. Then +click on the "View Certificates" button to open the "Certificate +Manager" window. You can then select the "Your Certificates" tab and +click on button "Import". Then locate the PKCS12 (.p12) certificate you +wish to import, and employ its associated password. + +If you are a Safari user, then simply open the "Keychain Access" +application and follow "File->Import items". + +If you are an Internet Explorer user, click +Start->Settings->Control Panel and then double-click on Internet. +On the Content tab, click Personal, and then click Import. In the +Password box, type your password. NB you may be prompted multiple times +for your password. In the "Certificate File To Import" box, type the +filename of the certificate you wish to import, and then click OK. Click +Close, and then click OK. + +Q: What is a proxy certificate? +------------------------------- + +A proxy certificate is a short-lived certificate which may be employed +by UNICORE and the Globus services. The proxy certificate consists of a +new user certificate and a newly generated proxy private key. This proxy +typically has a rather short lifetime (normally 12 hours) and often only +allows a limited delegation of rights. Its default location, for +Unix/Linux, is /tmp/x509_u*uid* but can be set via the +$X509_USER_PROXY environment variable. + +Q: What is the MyProxy service? +------------------------------- + +[The MyProxy Service](http://grid.ncsa.illinois.edu/myproxy/) +, can be employed by gsissh-term and Globus tools, and is +an online repository that allows users to store long lived proxy +certificates remotely, which can then be retrieved for use at a later +date. Each proxy is protected by a password provided by the user at the +time of storage. This is beneficial to Globus users as they do not have +to carry their private keys and certificates when travelling; nor do +users have to install private keys and certificates on possibly insecure +computers. + +Q: Someone may have copied or had access to the private key of my certificate either in a separate file or in the browser. What should I do? +-------------------------------------------------------------------------------------------------------------------------------------------- + +Please ask the CA that issued your certificate to revoke this certifcate +and to supply you with a new one. In addition, please report this to +IT4Innovations by contacting [the support +team](https://support.it4i.cz/rt). + diff --git a/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.md b/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.md new file mode 100644 index 0000000000000000000000000000000000000000..3cbe8e759f76b67c6eabb4c69281a50d0b436915 --- /dev/null +++ b/converted/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.md @@ -0,0 +1,266 @@ +Obtaining Login Credentials +=========================== + + + +Obtaining Authorization +----------------------- + +The computational resources of IT4I  are allocated by the Allocation +Committee to a [Project](../introduction.html), +investigated by a Primary Investigator. By allocating the computational +resources, the Allocation Committee is authorizing the PI to access and +use the clusters. The PI may decide to authorize a number of her/his +Collaborators to access and use the clusters, to consume the resources +allocated to her/his Project. These collaborators will be associated to +the Project. The Figure below is depicting the authorization chain: + + + + You need to either [become the +PI](../applying-for-resources.html) or [be named as a +collaborator](obtaining-login-credentials.html#authorization-of-collaborator-by-pi) +by a PI in order to access and use the clusters. + +Head of Supercomputing Services acts as a PI of a project DD-13-5. +Joining this project, you may **access and explore the clusters**, use +software, development environment and computers via the qexp and qfree +queues. You may use these resources for own education/research, no +paperwork is required. All IT4I employees may contact the Head of +Supercomputing Services in order to obtain **free access to the +clusters**. + +### Authorization of PI by Allocation Committee + +The PI is authorized to use the clusters by the allocation decision +issued by the Allocation Committee.The PI will be informed by IT4I about +the Allocation Committee decision. + +### Authorization by web + +This is a preferred way of granting access to project resources. +Please, use this method whenever it's possible. + +Log in to the [IT4I Extranet +portal](https://extranet.it4i.cz) using IT4I credentials +and go to the **Projects** section. + +- **Users:** Please, submit your requests for becoming a + project member. +- **Primary Investigators:** Please, approve or deny users' requests + in the same section. + +### Authorization by e-mail (an alternative approach) + + In order to authorize a Collaborator to utilize the allocated +resources, the PI should contact the [IT4I +support](https://support.it4i.cz/rt/) (E-mail: [support +[at] it4i.cz](mailto:support%20%5Bat%5D%20it4i.cz)) and provide +following information: + +1. Identify your project by project ID +2. Provide list of people, including himself, who are authorized to use + the resources allocated to the project. The list must include full + name, e-mail and affiliation. Provide usernames as well, if + collaborator login access already exists on the IT4I systems. +3. Include "Authorization to IT4Innovations" into the subject line. + +Example (except the subject line which must be in English, you may use +Czech or Slovak language for communication with us): + + Subject: Authorization to IT4Innovations + + Dear support, + + Please include my collaborators to project OPEN-0-0. + + John Smith, john.smith@myemail.com, Department of Chemistry, MIT, US + Jonas Johansson, jjohansson@otheremail.se, Department of Physics, Royal Institute of Technology, Sweden + Luisa Fibonacci, lf@emailitalia.it, Department of Mathematics, National Research Council, Italy + + Thank you, + PI + (Digitally signed) + +Should the above information be provided by e-mail, the e-mail **must +be** digitally signed. Read more on [digital +signatures](obtaining-login-credentials.html#the-certificates-for-digital-signatures) +below. + +The Login Credentials +------------------------- + +Once authorized by PI, every person (PI or Collaborator) wishing to +access the clusters, should contact the [IT4I +support](https://support.it4i.cz/rt/) (E-mail: [support +[at] it4i.cz](mailto:support%20%5Bat%5D%20it4i.cz)) providing +following information: + +1. Project ID +2. Full name and affiliation +3. Statement that you have read and accepted the [Acceptable use policy + document](http://www.it4i.cz/acceptable-use-policy.pdf) (AUP). +4. Attach the AUP file. +5. Your preferred username, max 8 characters long. The preferred + username must associate your surname and name or be otherwise + derived from it. Only alphanumeric sequences, dash and underscore + signs are allowed. +6. In case you choose [Alternative way to personal + certificate](obtaining-login-credentials.html#alternative-way-of-getting-personal-certificate), + a **scan of photo ID** (personal ID or passport or driver license) + is required + +Example (except the subject line which must be in English, you may use +Czech or Slovak language for communication with us): + + Subject: Access to IT4Innovations + + Dear support, + + Please open the user account for me and attach the account to OPEN-0-0 + Name and affiliation: John Smith, john.smith@myemail.com, Department of Chemistry, MIT, US + I have read and accept the Acceptable use policy document (attached) + + Preferred username: johnsm + + Thank you, + John Smith + (Digitally signed) + +Should the above information be provided by e-mail, the e-mail **must +be** digitally signed. To sign an e-mail, you need digital certificate. +Read more on [digital +signatures](obtaining-login-credentials.html#the-certificates-for-digital-signatures) +below. + +Digital signature allows us to confirm your identity in remote +electronic communication and provides an encrypted channel to exchange +sensitive information such as login credentials. After receiving your +signed e-mail with the requested information, we will send you your +login credentials (user name, key, passphrase and password) to access +the IT4I systems. + +We accept certificates issued by any widely respected certification +authority. + +For various reasons we do not accept PGP keys.** Please, use only +X.509 PKI certificates for communication with us.** + +You will receive your personal login credentials by protected e-mail. +The login credentials include: + +1. username +2. ssh private key and private key passphrase +3. system password + +The clusters are accessed by the [private +key](../accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.html) +and username. +Username and password is used for login to the information systems +listed on <http://support.it4i.cz/>. + +### Change Passphrase + +On Linux, use + +` +local $ ssh-keygen -f id_rsa -p +` + +On Windows, use [PuTTY Key +Generator](../accessing-the-clusters/shell-access-and-data-transfer/putty/puttygen.html). + +### Change Password + +Change password in your user profile at +<https://extranet.it4i.cz/user/> + +The Certificates for Digital Signatures +------------------------------------------- + +We accept personal certificates issued by any widely respected +certification authority (CA). This includes certificates by CAs +organized in International Grid Trust Federation +(<http://www.igtf.net/>), its European branch EUGridPMA - +<https://www.eugridpma.org/> and its member organizations, e.g. the +CESNET certification authority - <https://tcs-p.cesnet.cz/confusa/>. The +Czech *"Qualified certificate" (KvalifikovanĂ˝ certifikát)* (provided by +<http://www.postsignum.cz/> or +<http://www.ica.cz/Kvalifikovany-certifikat.aspx>), that is used in +electronic contact with Czech authorities is accepted as well. + +Certificate generation process is well-described here: + +- [How to generate a personal TCS certificate in Mozilla Firefox web + browser + (in Czech)](http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/moz-cert-gen) + + + +A FAQ about certificates can be found here: >[Certificates +FAQ](certificates-faq.html). + +Alternative Way to Personal Certificate +------------------------------------------- + +Follow these steps **only** if you can not obtain your certificate in a +standard way. +In case you choose this procedure, please attach a **scan of photo ID** +(personal ID or passport or drivers license) when applying for [login +credentials](obtaining-login-credentials.html#the-login-credentials). + +1. Go to <https://www.cacert.org/>. + - If there's a security warning, just acknowledge it. + +2. Click *Join*. +3. Fill in the form and submit it by the *Next* button. + - Type in the e-mail address which you use for communication + with us. + - Don't forget your chosen *Pass Phrase*. + +4. You will receive an e-mail verification link. Follow it. +5. After verifying, go to the CAcert's homepage and login using + *Password Login*. +6. Go to *Client Certificates* -> *New*. +7. Tick *Add* for your e-mail address and click the *Next* button. +8. Click the *Create Certificate Request* button. +9. You'll be redirected to a page from where you can download/install + your certificate. + - Simultaneously you'll get an e-mail with a link to + the certificate. + +Installation of the Certificate Into Your Mail Client +----------------------------------------------------- + +The procedure is similar to the following guides: + +- MS Outlook 2010 + - [How to Remove, Import, and Export Digital + Certificates](http://support.microsoft.com/kb/179380) + - [Importing a PKCS #12 certificate + (in Czech)](http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/outl-cert-imp) +- Mozilla Thudnerbird + - [Installing an SMIME + certificate](http://kb.mozillazine.org/Installing_an_SMIME_certificate) + - [Importing a PKCS #12 certificate + (in Czech)](http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/moz-cert-imp) + +End of User Account Lifecycle +----------------------------- + +User accounts are supported by membership in active Project(s) or by +affiliation to IT4Innovations. User accounts, that loose the support +(meaning, are not attached to an active project and are not affiliated +with IT4I), will be deleted 1 year after the last project to which they +were attached expires. + +User will get 3 automatically generated warning e-mail messages of the +pending removal:. + +- First message will be sent 3 months before the removal +- Second message will be sent 1 month before the removal +- Third message will be sent 1 week before the removal. + +The messages will inform about the projected removal date and will +challenge the user to migrate her/his data + diff --git a/converted/docs.it4i.cz/index.md b/converted/docs.it4i.cz/index.md new file mode 100644 index 0000000000000000000000000000000000000000..f99c494d9c204a5832fe1c55d520658ef12b5a1c --- /dev/null +++ b/converted/docs.it4i.cz/index.md @@ -0,0 +1,144 @@ +Documentation +============= + + + +Welcome to IT4Innovations documentation pages. The IT4Innovations +national supercomputing center operates supercomputers +[Salomon](salomon.html) and +[Anselm](anselm.html). The supercomputers are [ +available](get-started-with-it4innovations/applying-for-resources.html) +to academic community within the Czech Republic and Europe and +industrial community worldwide. The purpose of these pages is to provide +a comprehensive documentation on hardware, software and usage of the +computers. + + How to read the documentation +-------------------------------------------------------------------------------------------------- + +1. Read the list in the left column. Select the subject of interest. + Alternatively, use the Search box in the upper right corner. +2. Read the CONTENTS in the upper right corner. +3. Scan for all the yellow bulb call-outs on the page. +4. Read the details if still more information is needed. **Look for + examples** illustrating the concepts. + + + +The call-out.  Focus on the call-outs before reading full details. + + + +- Read the + [Changelog](get-started-with-it4innovations/changelog.html) + to keep up to date. + +Getting Help and Support +------------------------ + +Contact [support [at] +it4i.cz](mailto:support%20%5Bat%5D%20it4i.cz) for help and +support regarding the cluster technology at IT4Innovations. +Please use **Czech**, **Slovak** or **English** language for +communication with us. +Follow the status of your request to IT4Innovations at +[support.it4i.cz/rt](http://support.it4i.cz/rt). + + + +Use your IT4Innotations username and password to log in to the +[support](http://support.it4i.cz/) portal. + +Required Proficiency +-------------------- + +You need basic proficiency in Linux environment. + + + +In order to use the system for your calculations, you need basic +proficiency in Linux environment. To gain the proficiency, we recommend +you reading the [ introduction to +Linux](http://www.tldp.org/LDP/intro-linux/html/) +operating system environment and installing a Linux distribution on your +personal computer. A good choice might be the [ +Fedora](http://fedoraproject.org/) +distribution, as it is similar to systems on the clusters at +IT4Innovations. It's easy to install and use. In fact, any distribution +would do. + + + +Learn how to parallelize your code! + + + +In many cases, you will run your own code on the cluster. In order to +fully exploit the cluster, you will need to carefully consider how to +utilize all the cores available on the node and how to use multiple +nodes at the same time. You need to **parallelize** your code. +Proficieny in MPI, OpenMP, CUDA, UPC or GPI2 programming may be gained +via the [training provided by +IT4Innovations.](http://prace.it4i.cz) + +Terminology Frequently Used on These Pages +------------------------------------------ + +- **node:** a computer, interconnected by network to other computers - + Computational nodes are powerful computers, designed and dedicated + for executing demanding scientific computations. +- **core:** processor core, a unit of processor, executing + computations +- **corehours:** wall clock hours of processor core time - Each node + is equipped with **X** processor cores, provides **X** corehours per + 1 wall clock hour. +- **job:** a calculation running on the supercomputer - The job + allocates and utilizes resources of the supercomputer for + certain time. +- **HPC:** High Performance Computing +- **HPC (computational) resources:** corehours, storage capacity, + software licences +- **code:** a program +- **primary investigator (PI):** a person responsible for execution of + computational project and utilization of computational resources + allocated to that project +- **collaborator:** a person participating on execution of + computational project and utilization of computational resources + allocated to that project +- **project:** a computational project under investigation by the + PI - The project is identified by the project ID. The computational + resources are allocated and charged per project. +- **jobscript:** a script to be executed by the PBS Professional + workload manager + +Conventions +----------- + +In this documentation, you will find a number of pages containing +examples. We use the following conventions: + + Cluster command prompt + +` +$ +` + +Your local linux host command prompt + +` +local $ +` + + Errata +------- + +Although we have taken every care to ensure the accuracy of our content, +mistakes do happen. If you find a mistake in the text or the code we +would be grateful if you would report this to us. By doing so, you can +save other readers from frustration and help us improve subsequent +versions of this documentation. If you find any errata, please report +them by visiting http://support.it4i.cz/rt, creating a new ticket, and +entering the details of your errata. Once your errata are verified, your +submission will be accepted and the errata will be uploaded on our +website. + diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster.md b/converted/docs.it4i.cz/salomon/accessing-the-cluster.md new file mode 100644 index 0000000000000000000000000000000000000000..88e1d77ab3ecadc0bfd0c16ea3124c142f0632f2 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/accessing-the-cluster.md @@ -0,0 +1,143 @@ +Shell access and data transfer +============================== + + + +Interactive Login +----------------- + +The Salomon cluster is accessed by SSH protocol via login nodes login1, +login2, login3 and login4 at address salomon.it4i.cz. The login nodes +may be addressed specifically, by prepending the login node name to the +address. + +The alias >salomon.it4i.cz is currently not available through VPN +connection. Please use loginX.salomon.it4i.cz when connected to +VPN. + + |Login address|Port|Protocol|Login node| + |---|---|---|---| + |salomon.it4i.cz|22|ssh|round-robin DNS record for login[1-4]| + |login1.salomon.it4i.cz|22|ssh|login1| + |login1.salomon.it4i.cz|22|ssh|login1| + |login1.salomon.it4i.cz|22|ssh|login1| + |login1.salomon.it4i.cz|22|ssh|login1| + +The authentication is by the [private +key](../get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.html) + +Please verify SSH fingerprints during the first logon. They are +identical on all login nodes: +f6:28:98:e4:f9:b2:a6:8f:f2:f4:2d:0a:09:67:69:80 (DSA) +70:01:c9:9a:5d:88:91:c7:1b:c0:84:d1:fa:4e:83:5c (RSA) + + + +Private key (`id_rsa/id_rsa.ppk` ): `600 (-rw-------)`s authentication: + +On **Linux** or **Mac**, use + +` +local $ ssh -i /path/to/id_rsa username@salomon.it4i.cz +` + +If you see warning message "UNPROTECTED PRIVATE KEY FILE!", use this +command to set lower permissions to private key file. + +` +local $ chmod 600 /path/to/id_rsa +` + +On **Windows**, use [PuTTY ssh +client](../get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty/putty.html). + +After logging in, you will see the command prompt: + +                    _____      _                            +                   / ____|    | |                           +                  | (___  __ _| | ___ _ __ ___  ___ _ __  +                   ___ / _` | |/ _ | '_ ` _ / _ | '_ +                   ____) | (_| | | (_) | | | | | | (_) | | | | +                  |_____/ __,_|_|___/|_| |_| |_|___/|_| |_| +                  + +                        http://www.it4i.cz/?lang=en + + Last login: Tue Jul 9 15:57:38 2013 from your-host.example.com + [username@login2.salomon ~]$ + +The environment is **not** shared between login nodes, except for +[shared filesystems](storage/storage.html). + +Data Transfer +------------- + +Data in and out of the system may be transferred by the +[scp](http://en.wikipedia.org/wiki/Secure_copy) and sftp +protocols. + +In case large volumes of data are transferred, use dedicated data mover +nodes cedge[1-3].salomon.it4i.cz for increased performance. + + + +HTML commented section #1 (removed cedge servers from the table) + + Address |Port|Protocol| + ----------------------- |---|---|------------ + salomon.it4i.cz 22 scp, sftp + login1.salomon.it4i.cz 22 scp, sftp + login2.salomon.it4i.cz 22 scp, sftp + login3.salomon.it4i.cz 22 scp, sftp + login4.salomon.it4i.cz 22 scp, sftp + + The authentication is by the [private +key](../get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.html) + +HTML commented section #2 (ssh transfer performance data need to be +verified) + +On linux or Mac, use scp or sftp client to transfer the data to Salomon: + +` +local $ scp -i /path/to/id_rsa my-local-file username@salomon.it4i.cz:directory/file +` + +` +local $ scp -i /path/to/id_rsa -r my-local-dir username@salomon.it4i.cz:directory +` + + or + +` +local $ sftp -o IdentityFile=/path/to/id_rsa username@salomon.it4i.cz +` + +Very convenient way to transfer files in and out of the Salomon computer +is via the fuse filesystem +[sshfs](http://linux.die.net/man/1/sshfs) + +` +local $ sshfs -o IdentityFile=/path/to/id_rsa username@salomon.it4i.cz:. mountpoint +` + +Using sshfs, the users Salomon home directory will be mounted on your +local computer, just like an external disk. + +Learn more on ssh, scp and sshfs by reading the manpages + +` +$ man ssh +$ man scp +$ man sshfs +` + +On Windows, use [WinSCP +client](http://winscp.net/eng/download.php) to transfer +the data. The [win-sshfs +client](http://code.google.com/p/win-sshfs/) provides a +way to mount the Salomon filesystems directly as an external disc. + +More information about the shared file systems is available +[here](storage/storage.html). + diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/anyconnectcontextmenu.jpg b/converted/docs.it4i.cz/salomon/accessing-the-cluster/anyconnectcontextmenu.jpg new file mode 100644 index 0000000000000000000000000000000000000000..26122f2f5965ce71489267d359eff21c39ed7960 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/anyconnectcontextmenu.jpg differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/anyconnecticon.jpg b/converted/docs.it4i.cz/salomon/accessing-the-cluster/anyconnecticon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d7397f5c43f659d71a6fe8413ad89dca8157bb48 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/anyconnecticon.jpg differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/copy_of_vpn_web_install_3.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/copy_of_vpn_web_install_3.png new file mode 100644 index 0000000000000000000000000000000000000000..1750cfde48b5c660f9c375dd88d3c666f36dcded Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/copy_of_vpn_web_install_3.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.md b/converted/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.md new file mode 100644 index 0000000000000000000000000000000000000000..252d36b8f957e110b01c4094bd9bacbe8eca5cc7 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.md @@ -0,0 +1,118 @@ +Outgoing connections +==================== + + + +Connection restrictions +----------------------- + +Outgoing connections, from Salomon Cluster login nodes to the outside +world, are restricted to following ports: + + |Port|Protocol| + |---|---| + |22|ssh| + |80|http| + |443|https| + |9418|git| + +Please use **ssh port forwarding** and proxy servers to connect from +Salomon to all other remote ports. + +Outgoing connections, from Salomon Cluster compute nodes are restricted +to the internal network. Direct connections form compute nodes to +outside world are cut. + +Port forwarding +--------------- + +### Port forwarding from login nodes + +Port forwarding allows an application running on Salomon to connect to +arbitrary remote host and port. + +It works by tunneling the connection from Salomon back to users +workstation and forwarding from the workstation to the remote host. + +Pick some unused port on Salomon login node (for example 6000) and +establish the port forwarding: + +` +local $ ssh -R 6000:remote.host.com:1234 salomon.it4i.cz +` + +In this example, we establish port forwarding between port 6000 on +Salomon and port 1234 on the remote.host.com. By accessing +localhost:6000 on Salomon, an application will see response of +remote.host.com:1234. The traffic will run via users local workstation. + +Port forwarding may be done **using PuTTY** as well. On the PuTTY +Configuration screen, load your Salomon configuration first. Then go to +Connection->SSH->Tunnels to set up the port forwarding. Click +Remote radio button. Insert 6000 to Source port textbox. Insert +remote.host.com:1234. Click Add button, then Open. + +Port forwarding may be established directly to the remote host. However, +this requires that user has ssh access to remote.host.com + +` +$ ssh -L 6000:localhost:1234 remote.host.com +` + +Note: Port number 6000 is chosen as an example only. Pick any free port. + +### Port forwarding from compute nodes + +Remote port forwarding from compute nodes allows applications running on +the compute nodes to access hosts outside Salomon Cluster. + +First, establish the remote port forwarding form the login node, as +[described +above](outgoing-connections.html#port-forwarding-from-login-nodes). + +Second, invoke port forwarding from the compute node to the login node. +Insert following line into your jobscript or interactive shell + +` +$ ssh -TN -f -L 6000:localhost:6000 login1 +` + +In this example, we assume that port forwarding from login1:6000 to +remote.host.com:1234 has been established beforehand. By accessing +localhost:6000, an application running on a compute node will see +response of remote.host.com:1234 + +### Using proxy servers + +Port forwarding is static, each single port is mapped to a particular +port on remote host. Connection to other remote host, requires new +forward. + +Applications with inbuilt proxy support, experience unlimited access to +remote hosts, via single proxy server. + +To establish local proxy server on your workstation, install and run +SOCKS proxy server software. On Linux, sshd demon provides the +functionality. To establish SOCKS proxy server listening on port 1080 +run: + +` +local $ ssh -D 1080 localhost +` + +On Windows, install and run the free, open source [Sock +Puppet](http://sockspuppet.com/) server. + +Once the proxy server is running, establish ssh port forwarding from +Salomon to the proxy server, port 1080, exactly as [described +above](outgoing-connections.html#port-forwarding-from-login-nodes). + +` +local $ ssh -R 6000:localhost:1080 salomon.it4i.cz +` + +Now, configure the applications proxy settings to **localhost:6000**. +Use port forwarding to access the [proxy server from compute +nodes](outgoing-connections.html#port-forwarding-from-compute-nodes) +as well . + diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.md b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.md new file mode 100644 index 0000000000000000000000000000000000000000..7ac87bb0cfa7c952517362d712bfdacad650d468 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.md @@ -0,0 +1,108 @@ +VPN Access +========== + + + +Accessing IT4Innovations internal resources via VPN +--------------------------------------------------- + +For using resources and licenses which are located at IT4Innovations +local network, it is necessary to VPN connect to this network. +We use Cisco AnyConnect Secure Mobility Client, which is supported on +the following operating systems: + +- >Windows XP +- >Windows Vista +- >Windows 7 +- >Windows 8 +- >Linux +- >MacOS + +It is impossible to connect to VPN from other operating systems. + +VPN client installation +------------------------------------ + +You can install VPN client from web interface after successful login +with LDAP credentials on address <https://vpn.it4i.cz/user> + + + +According to the Java settings after login, the client either +automatically installs, or downloads installation file for your +operating system. It is necessary to allow start of installation tool +for automatic installation. + + + + + + +After successful installation, VPN connection will be established and +you can use available resources from IT4I network. + + + +If your Java setting doesn't allow automatic installation, you can +download installation file and install VPN client manually. + + + +After you click on the link, download of installation file will start. + + + +After successful download of installation file, you have to execute this +tool with administrator's rights and install VPN client manually. + +Working with VPN client +----------------------- + +You can use graphical user interface or command line interface to run +VPN client on all supported operating systems. We suggest using GUI. + +Before the first login to VPN, you have to fill +URL **https://vpn.it4i.cz/user** into the text field. + + Contacting + + +After you click on the Connect button, you must fill your login +credentials. + + Contacting + + +After a successful login, the client will minimize to the system tray. +If everything works, you can see a lock in the Cisco tray icon. + +[ + + +If you right-click on this icon, you will see a context menu in which +you can control the VPN connection. + +[ + + +When you connect to the VPN for the first time, the client downloads the +profile and creates a new item "IT4I cluster" in the connection list. +For subsequent connections, it is not necessary to re-enter the URL +address, but just select the corresponding item. + + Contacting + + +Then AnyConnect automatically proceeds like in the case of first logon. + + + +After a successful logon, you can see a green circle with a tick mark on +the lock icon. + + Succesfull + + +For disconnecting, right-click on the AnyConnect client icon in the +system tray and select **VPN Disconnect**. + diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting.png new file mode 100644 index 0000000000000000000000000000000000000000..bf8578866584b7ec379c29f7aca8015ca5809a60 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting_https.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting_https.png new file mode 100644 index 0000000000000000000000000000000000000000..cd2e6fa2f047a82bbec7bcd689c58a2f5bd4c77f Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting_https.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting_https_cluster.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting_https_cluster.png new file mode 100644 index 0000000000000000000000000000000000000000..a87623cc85b5700752f45773b8af3846515eabe7 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_contacting_https_cluster.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_login.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_login.png new file mode 100644 index 0000000000000000000000000000000000000000..24c24388b31403d194fdf6354ccf5b898a78767c Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_login.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_successfull_connection.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_successfull_connection.png new file mode 100644 index 0000000000000000000000000000000000000000..2157c5468afb9d52e85a7c3c314b100b35c5c3ae Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_successfull_connection.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_download.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_download.png new file mode 100644 index 0000000000000000000000000000000000000000..5ca0bd393a836fd9aaea2e7531ccdbad0132e59e Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_download.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_download_2.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_download_2.png new file mode 100644 index 0000000000000000000000000000000000000000..135d38dab1320381f25783fdde0acdecc4a8ad5a Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_download_2.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_install_2.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_install_2.png new file mode 100644 index 0000000000000000000000000000000000000000..44086cdc923fc0d79ad1d2372b73eff8d4790d46 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_install_2.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_install_4.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_install_4.png new file mode 100644 index 0000000000000000000000000000000000000000..f28001c41c2c9c3100d2667c84f9048dee71d744 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_install_4.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_login.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_login.png new file mode 100644 index 0000000000000000000000000000000000000000..ae168c945b82d8d141c0e8a8c502f0c66541eeae Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_login.png differ diff --git a/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_login_2.png b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_login_2.png new file mode 100644 index 0000000000000000000000000000000000000000..5df65d34d7389468d66a2ee1075fce9a22a9fab4 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/accessing-the-cluster/vpn_web_login_2.png differ diff --git a/converted/docs.it4i.cz/salomon/environment-and-modules.md b/converted/docs.it4i.cz/salomon/environment-and-modules.md new file mode 100644 index 0000000000000000000000000000000000000000..e9da01143447365624c102d1cb9ba090afe59d95 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/environment-and-modules.md @@ -0,0 +1,161 @@ +Environment and Modules +======================= + + + +### Environment Customization + +After logging in, you may want to configure the environment. Write your +preferred path definitions, aliases, functions and module loads in the +.bashrc file + +` +# ./bashrc + +# Source global definitions +if [ -f /etc/bashrc ]; then + . /etc/bashrc +fi + +# User specific aliases and functions +alias qs='qstat -a' +module load intel/2015b + +# Display informations to standard output - only in interactive ssh session +if [ -n "$SSH_TTY" ] +then + module list # Display loaded modules +fi +` + +Do not run commands outputing to standard output (echo, module list, +etc) in .bashrc for non-interactive SSH sessions. It breaks fundamental +functionality (scp, PBS) of your account! Take care for SSH session +interactivity for such commands as + stated in the previous example. +in the previous example. + +### Application Modules + +In order to configure your shell for running particular application on +Salomon we use Module package interface. + +Application modules on Salomon cluster are built using +[EasyBuild](http://hpcugent.github.io/easybuild/ "EasyBuild"). The +modules are divided into the following structure: + +` + base: Default module class + bio: Bioinformatics, biology and biomedical + cae: Computer Aided Engineering (incl. CFD) + chem: Chemistry, Computational Chemistry and Quantum Chemistry + compiler: Compilers + data: Data management & processing tools + debugger: Debuggers + devel: Development tools + geo: Earth Sciences + ide: Integrated Development Environments (e.g. editors) + lang: Languages and programming aids + lib: General purpose libraries + math: High-level mathematical software + mpi: MPI stacks + numlib: Numerical Libraries + perf: Performance tools + phys: Physics and physical systems simulations + system: System utilities (e.g. highly depending on system OS and hardware) + toolchain: EasyBuild toolchains + tools: General purpose tools + vis: Visualization, plotting, documentation and typesetting +` + +The modules set up the application paths, library paths and environment +variables for running particular application. + +The modules may be loaded, unloaded and switched, according to momentary +needs. + +To check available modules use + +` +$ module avail +` + +To load a module, for example the OpenMPI module use + +` +$ module load OpenMPI +` + +loading the OpenMPI module will set up paths and environment variables +of your active shell such that you are ready to run the OpenMPI software + +To check loaded modules use + +` +$ module list +` + + To unload a module, for example the OpenMPI module use + +` +$ module unload OpenMPI +` + +Learn more on modules by reading the module man page + +` +$ man module +` + +### EasyBuild Toolchains + +As we wrote earlier, we are using EasyBuild for automatised software +installation and module creation. + +EasyBuild employs so-called **compiler toolchains** or, +simply toolchains for short, which are a major concept in handling the +build and installation processes. + +A typical toolchain consists of one or more compilers, usually put +together with some libraries for specific functionality, e.g., for using +an MPI stack for distributed computing, or which provide optimized +routines for commonly used math operations, e.g., the well-known +BLAS/LAPACK APIs for linear algebra routines. + +For each software package being built, the toolchain to be used must be +specified in some way. + +The EasyBuild framework prepares the build environment for the different +toolchain components, by loading their respective modules and defining +environment variables to specify compiler commands (e.g., +via `$F90`), compiler and linker options (e.g., +via `$CFLAGS` and `$LDFLAGS`{.docutils .literal}), +the list of library names to supply to the linker (via `$LIBS`{.docutils +.literal}), etc. This enables making easyblocks +largely toolchain-agnostic since they can simply rely on these +environment variables; that is, unless they need to be aware of, for +example, the particular compiler being used to determine the build +configuration options. + +Recent releases of EasyBuild include out-of-the-box toolchain support +for: + +- various compilers, including GCC, Intel, Clang, CUDA +- common MPI libraries, such as Intel MPI, MPICH, MVAPICH2, OpenMPI +- various numerical libraries, including ATLAS, Intel MKL, OpenBLAS, + ScalaPACK, FFTW + + + +On Salomon, we have currently following toolchains installed: + + |Toolchain|Module(s)| + |---|----| + |GCC|GCC| + |ictce|icc, ifort, imkl, impi| + |intel|GCC, icc, ifort, imkl, impi| + |gompi|GCC, OpenMPI| + |goolf|BLACS, FFTW, GCC, OpenBLAS, OpenMPI, ScaLAPACK| + |iompi|OpenMPI, icc, ifort| + |iccifort|icc, ifort| + diff --git a/converted/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.md b/converted/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.md new file mode 100644 index 0000000000000000000000000000000000000000..58d4f81c6fc3a45654ba30fb51e6e43da6f4f8a7 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.md @@ -0,0 +1,89 @@ +Hardware Overview +================= + + + +Introduction +------------ + +The Salomon cluster consists of 1008 computational nodes of which 576 +are regular compute nodes and 432 accelerated nodes. Each node is a + powerful x86-64 computer, equipped +with 24 cores (two twelve-core Intel Xeon processors) and 128GB RAM. The +nodes are interlinked by high speed InfiniBand and Ethernet networks. +All nodes share 0.5PB /home NFS disk storage to store the user files. +Users may use a DDN Lustre shared storage with capacity of 1.69 PB which +is available for the scratch project data. The user access to the +Salomon cluster is provided by four login nodes. + +[More about schematic representation of the Salomon cluster compute +nodes IB +topology](../network-1/ib-single-plane-topology.html). + + + +The parameters are summarized in the following tables: + +General information +------------------- + +In general** +Primary purpose +High Performance Computing +Architecture of compute nodes +x86-64 +Operating system +CentOS 6.7 Linux +[**Compute nodes**](../compute-nodes.html) +Totally +1008 +Processor +2x Intel Xeon E5-2680v3, 2.5GHz, 12cores +RAM +128GB, 5.3GB per core, DDR4@2133 MHz +Local disk drive +no +Compute network / Topology +InfiniBand FDR56 / 7D Enhanced hypercube +w/o accelerator +576 +MIC accelerated +432 +In total** +Total theoretical peak performance (Rpeak) +2011 Tflop/s +Total amount of RAM +129.024 TB +Compute nodes +------------- + + |Node|Count|Processor|Cores|Memory|Accelerator| + ----------------- - |---|---|------------------------ ------- -------- -------------------------------------------- + |w/o accelerator|576|2x Intel Xeon E5-2680v3, 2.5GHz|24|128GB|-| + |MIC accelerated|432|2x Intel Xeon E5-2680v3, 2.5GHz|24|128GB|2x Intel Xeon Phi 7120P, 61cores, 16GB RAM| + +For more details please refer to the [Compute +nodes](../compute-nodes.html). + +Remote visualization nodes +-------------------------- + +For remote visualization two nodes with NICE DCV software are available +each configured: + + |Node|Count|Processor|Cores|Memory|GPU Accelerator| + --------------- - |---|---|----------------------- ------- -------- ------------------------------ + |visualization|2|2x Intel Xeon E5-2695v3, 2.3GHz|28|512GB|NVIDIA QUADRO K5000, 4GB RAM| + +SGI UV 2000 +----------- + +For large memory computations a special SMP/NUMA SGI UV 2000 server is +available: + + |Node |Count |Processor |Cores<th align="left">Memory<th align="left">Extra HW | + | --- | --- | + |UV2000 |1 |14x Intel Xeon E5-4627v2, 3.3GHz, 8cores |112 |3328GB DDR3@1866MHz |2x 400GB local SSD1x NVIDIA GM200(GeForce GTX TITAN X),12GB RAM\ | + + + diff --git a/converted/docs.it4i.cz/salomon/hardware-overview-1/uv-2000.jpeg b/converted/docs.it4i.cz/salomon/hardware-overview-1/uv-2000.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..bc6e51cde6ca191e118211763d9e42c097222eaa Binary files /dev/null and b/converted/docs.it4i.cz/salomon/hardware-overview-1/uv-2000.jpeg differ diff --git a/converted/docs.it4i.cz/salomon/introduction.md b/converted/docs.it4i.cz/salomon/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..7996d06642a9fb087c549727b1e0a05e224569e3 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/introduction.md @@ -0,0 +1,33 @@ +Introduction +============ + +Welcome to Salomon supercomputer cluster. The Salomon cluster consists +of 1008 compute nodes, totaling 24192 compute cores with 129TB RAM and +giving over 2 Pflop/s theoretical peak performance. Each node is a +powerful x86-64 computer, equipped with 24 +cores, at least 128GB RAM. Nodes are interconnected by 7D Enhanced +hypercube Infiniband network and equipped with Intel Xeon E5-2680v3 +processors. The Salomon cluster consists of 576 nodes without +accelerators and 432 nodes equipped with Intel Xeon Phi MIC +accelerators. Read more in [Hardware +Overview](hardware-overview-1/hardware-overview.html). + +The cluster runs CentOS Linux [ +](http://www.bull.com/bullx-logiciels/systeme-exploitation.html) +operating system, which is compatible with +the RedHat [ +Linux +family.](http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg) + +**Water-cooled Compute Nodes With MIC Accelerator** + + + + + +**Tape Library T950B** + + + + + diff --git a/converted/docs.it4i.cz/salomon/network-1/7D_Enhanced_hypercube.png b/converted/docs.it4i.cz/salomon/network-1/7D_Enhanced_hypercube.png new file mode 100644 index 0000000000000000000000000000000000000000..12bb0a406708a5839f2f008294a560a04a80abe8 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/network-1/7D_Enhanced_hypercube.png differ diff --git a/converted/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.md b/converted/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.md new file mode 100644 index 0000000000000000000000000000000000000000..633115bd68cf9f0dcf0da9beefdc8c1d55a97ee4 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.md @@ -0,0 +1,37 @@ +7D Enhanced Hypercube +===================== + +[More about Job submission - Placement by IB switch / Hypercube +dimension.](../resource-allocation-and-job-execution/job-submission-and-execution.html) + +Nodes may be selected via the PBS resource attribute ehc_[1-7]d . + + |Hypercube|dimension| + --------------- |---|---|--------------------------------- + |1D|ehc_1d| + |2D|ehc_2d| + |3D|ehc_3d| + |4D|ehc_4d| + |5D|ehc_5d| + |6D|ehc_6d| + |7D|ehc_7d| + +[Schematic representation of the Salomon cluster IB single-plain +topology represents hypercube +dimension 0](ib-single-plane-topology.html). + +### 7D Enhanced Hypercube {#d-enhanced-hypercube} + + + + + + |Node type|Count|Short name|Long name|Rack| + -------------------------------------- - |---|---|-------- -------------------------- ------- + |M-Cell compute nodes w/o accelerator|576|cns1 -cns576|r1i0n0 - r4i7n17|1-4| + |compute nodes MIC accelerated|432|cns577 - cns1008|r21u01n577 - r37u31n1008|21-38| + +###  IB Topology + + + diff --git a/converted/docs.it4i.cz/salomon/network-1/IBsingleplanetopologyAcceleratednodessmall.png b/converted/docs.it4i.cz/salomon/network-1/IBsingleplanetopologyAcceleratednodessmall.png new file mode 100644 index 0000000000000000000000000000000000000000..fd6d5759d5bd38554e2e5b956435444cd3c2044c Binary files /dev/null and b/converted/docs.it4i.cz/salomon/network-1/IBsingleplanetopologyAcceleratednodessmall.png differ diff --git a/converted/docs.it4i.cz/salomon/network-1/IBsingleplanetopologyICEXMcellsmall.png b/converted/docs.it4i.cz/salomon/network-1/IBsingleplanetopologyICEXMcellsmall.png new file mode 100644 index 0000000000000000000000000000000000000000..2fc399aed344f04a2916e2dfe48cd956a4370bbc Binary files /dev/null and b/converted/docs.it4i.cz/salomon/network-1/IBsingleplanetopologyICEXMcellsmall.png differ diff --git a/converted/docs.it4i.cz/salomon/network-1/Salomon_IB_topology.png b/converted/docs.it4i.cz/salomon/network-1/Salomon_IB_topology.png new file mode 100644 index 0000000000000000000000000000000000000000..e370a4b44725571e51498ed45fa3e0aa313b94e4 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/network-1/Salomon_IB_topology.png differ diff --git a/converted/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.md b/converted/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.md new file mode 100644 index 0000000000000000000000000000000000000000..70bd60ea10df3857aea3e4b8e7295e0aa3f8cf5f --- /dev/null +++ b/converted/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.md @@ -0,0 +1,46 @@ +IB single-plane topology +======================== + + + +A complete M-Cell assembly consists of four compute racks. Each rack +contains 4x physical IRUs - Independent rack units. Using one dual +socket node per one blade slot leads to 8 logical IRUs. Each rack +contains 4x2 SGI ICE X IB Premium Blades. + +The SGI ICE X IB Premium Blade provides the first level of +interconnection via dual 36-port Mellanox FDR InfiniBand ASIC switch +with connections as follows: + +- 9 ports from each switch chip connect to the unified backplane, to + connect the 18 compute node slots +- 3 ports on each chip provide connectivity between the chips +- 24 ports from each switch chip connect to the external bulkhead, for + a total of 48 + +###IB single-plane topology - ICEX Mcell + +Each colour in each physical IRU represents one dual-switch ASIC switch. + + + + + +### IB single-plane topology - Accelerated nodes + +Each of the 3 inter-connected D racks are equivalent to one half of +Mcell rack. 18x D rack with MIC accelerated nodes [r21-r38] are +equivalent to 3 Mcell racks as shown in a diagram [7D Enhanced +Hypercube](7d-enhanced-hypercube.html). + +As shown in a diagram : + +- Racks 21, 22, 23, 24, 25, 26 are equivalent to one Mcell rack. +- Racks 27, 28, 29, 30, 31, 32 are equivalent to one Mcell rack. +- Racks 33, 34, 35, 36, 37, 38 are equivalent to one Mcell rack. + + + + + diff --git a/converted/docs.it4i.cz/salomon/network-1/network.md b/converted/docs.it4i.cz/salomon/network-1/network.md new file mode 100644 index 0000000000000000000000000000000000000000..79187d02c3c1a7c5905fd4d4fb4e2939f6e19c09 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/network-1/network.md @@ -0,0 +1,74 @@ +Network +======= + + + +All compute and login nodes of Salomon are interconnected by 7D Enhanced +hypercube +[Infiniband](http://en.wikipedia.org/wiki/InfiniBand) +network and by Gigabit +[Ethernet](http://en.wikipedia.org/wiki/Ethernet) +network. Only +[Infiniband](http://en.wikipedia.org/wiki/InfiniBand) +network may be used to transfer user data. + +Infiniband Network +------------------ + +All compute and login nodes of Salomon are interconnected by 7D Enhanced +hypercube +[Infiniband](http://en.wikipedia.org/wiki/InfiniBand) +network (56 Gbps). The network topology is a [7D Enhanced +hypercube](7d-enhanced-hypercube.html). + +Read more about schematic representation of the Salomon cluster [IB +single-plain topology](ib-single-plane-topology.html) +([hypercube dimension](7d-enhanced-hypercube.html) +0).[>](IB%20single-plane%20topology%20-%20Accelerated%20nodes.pdf/view.html) + +The compute nodes may be accessed via the Infiniband network using ib0 +network interface, in address range 10.17.0.0 (mask 255.255.224.0). The +MPI may be used to establish native Infiniband connection among the +nodes. + +The network provides **2170MB/s** transfer rates via the TCP connection +(single stream) and up to **3600MB/s** via native Infiniband protocol. + + + +Example +------- + +` +$ qsub -q qexp -l select=4:ncpus=16 -N Name0 ./myjob +$ qstat -n -u username + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +15209.isrv5 username qexp Name0 5530 4 96 -- 01:00 R 00:00 + r4i1n0/0*24+r4i1n1/0*24+r4i1n2/0*24+r4i1n3/0*24 +` + +In this example, we access the node r4i1n0 by Infiniband network via the +ib0 interface. + +` +$ ssh 10.17.35.19 +` + +In this example, we get +information of the Infiniband network. + +` +$ ifconfig +.... +inet addr:10.17.35.19.... +.... + +$ ip addr show ib0 + +.... +inet 10.17.35.19.... +.... +` + diff --git a/converted/docs.it4i.cz/salomon/prace.md b/converted/docs.it4i.cz/salomon/prace.md new file mode 100644 index 0000000000000000000000000000000000000000..daee862130cdbcfc236bfec42e8ae7eab7bbb0bf --- /dev/null +++ b/converted/docs.it4i.cz/salomon/prace.md @@ -0,0 +1,385 @@ +PRACE User Support +================== + + + +Intro +----- + +PRACE users coming to Salomon as to TIER-1 system offered through the +DECI calls are in general treated as standard users and so most of the +general documentation applies to them as well. This section shows the +main differences for quicker orientation, but often uses references to +the original documentation. PRACE users who don't undergo the full +procedure (including signing the IT4I AuP on top of the PRACE AuP) will +not have a password and thus access to some services intended for +regular users. This can lower their comfort, but otherwise they should +be able to use the TIER-1 system as intended. Please see the [Obtaining +Login Credentials +section](../get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.html), +if the same level of access is required. + +All general [PRACE User +Documentation](http://www.prace-ri.eu/user-documentation/) +should be read before continuing reading the local documentation here. + +[]()Help and Support +------------------------ + +If you have any troubles, need information, request support or want to +install additional software, please use [PRACE +Helpdesk](http://www.prace-ri.eu/helpdesk-guide264/). + +Information about the local services are provided in the [introduction +of general user documentation](introduction.html). +Please keep in mind, that standard PRACE accounts don't have a password +to access the web interface of the local (IT4Innovations) request +tracker and thus a new ticket should be created by sending an e-mail to +support[at]it4i.cz. + +Obtaining Login Credentials +--------------------------- + +In general PRACE users already have a PRACE account setup through their +HOMESITE (institution from their country) as a result of rewarded PRACE +project proposal. This includes signed PRACE AuP, generated and +registered certificates, etc. + +If there's a special need a PRACE user can get a standard (local) +account at IT4Innovations. To get an account on the Salomon cluster, the +user needs to obtain the login credentials. The procedure is the same as +for general users of the cluster, so please see the corresponding +[section of the general documentation +here](../get-started-with-it4innovations/obtaining-login-credentials.html). + +Accessing the cluster +--------------------- + +### Access with GSI-SSH + +For all PRACE users the method for interactive access (login) and data +transfer based on grid services from Globus Toolkit (GSI SSH and +GridFTP) is supported. + +The user will need a valid certificate and to be present in the PRACE +LDAP (please contact your HOME SITE or the primary investigator of your +project for LDAP account creation). + +Most of the information needed by PRACE users accessing the Salomon +TIER-1 system can be found here: + +- [General user's + FAQ](http://www.prace-ri.eu/Users-General-FAQs) +- [Certificates + FAQ](http://www.prace-ri.eu/Certificates-FAQ) +- [Interactive access using + GSISSH](http://www.prace-ri.eu/Interactive-Access-Using-gsissh) +- [Data transfer with + GridFTP](http://www.prace-ri.eu/Data-Transfer-with-GridFTP-Details) +- [Data transfer with + gtransfer](http://www.prace-ri.eu/Data-Transfer-with-gtransfer) + + + +Before you start to use any of the services don't forget to create a +proxy certificate from your certificate: + + $ grid-proxy-init + +To check whether your proxy certificate is still valid (by default it's +valid 12 hours), use: + + $ grid-proxy-info + + + +To access Salomon cluster, two login nodes running GSI SSH service are +available. The service is available from public Internet as well as from +the internal PRACE network (accessible only from other PRACE partners). + +***Access from PRACE network:** + +It is recommended to use the single DNS name +salomon-prace.it4i.cz which is distributed +between the two login nodes. If needed, user can login directly to one +of the login nodes. The addresses are: + + |Login address|Port|Protocol|Login node| + |---|---| + |salomon-prace.it4i.cz|2222|gsissh|login1, login2, login3 or login4| + |login1-prace.salomon.it4i.cz|2222|gsissh|login1| + |login2-prace.salomon.it4i.cz|2222|gsissh|login2| + |login3-prace.salomon.it4i.cz|2222|gsissh|login3| + |login4-prace.salomon.it4i.cz|2222|gsissh|login4| + + + + $ gsissh -p 2222 salomon-prace.it4i.cz + +When logging from other PRACE system, the prace_service script can be +used: + + $ gsissh `prace_service -i -s salomon` + + + +***Access from public Internet:** + +It is recommended to use the single DNS name +salomon.it4i.cz which is distributed between +the two login nodes. If needed, user can login directly to one of the +login nodes. The addresses are: + + |Login address|Port|Protocol|Login node| + |---|---| + |salomon.it4i.cz|2222|gsissh|login1, login2, login3 or login4| + |login1.salomon.it4i.cz|2222|gsissh|login1| + |login2-prace.salomon.it4i.cz|2222|gsissh|login2| + |login3-prace.salomon.it4i.cz|2222|gsissh|login3| + |login4-prace.salomon.it4i.cz|2222|gsissh|login4| + + $ gsissh -p 2222 salomon.it4i.cz + +When logging from other PRACE system, the +prace_service script can be used: + + $ gsissh `prace_service -e -s salomon` + + + +Although the preferred and recommended file transfer mechanism is [using +GridFTP](prace.html#file-transfers), the GSI SSH +implementation on Salomon supports also SCP, so for small files transfer +gsiscp can be used: + + $ gsiscp -P 2222 _LOCAL_PATH_TO_YOUR_FILE_ salomon.it4i.cz:_SALOMON_PATH_TO_YOUR_FILE_ + + $ gsiscp -P 2222 salomon.it4i.cz:_SALOMON_PATH_TO_YOUR_FILE_ _LOCAL_PATH_TO_YOUR_FILE_ + + $ gsiscp -P 2222 _LOCAL_PATH_TO_YOUR_FILE_ salomon-prace.it4i.cz:_SALOMON_PATH_TO_YOUR_FILE_ + + $ gsiscp -P 2222 salomon-prace.it4i.cz:_SALOMON_PATH_TO_YOUR_FILE_ _LOCAL_PATH_TO_YOUR_FILE_ + +### Access to X11 applications (VNC) + +If the user needs to run X11 based graphical application and does not +have a X11 server, the applications can be run using VNC service. If the +user is using regular SSH based access, please see the [section in +general +documentation](../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html). + +If the user uses GSI SSH based access, then the procedure is similar to +the SSH based access ([look +here](../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html)), +only the port forwarding must be done using GSI SSH: + + $ gsissh -p 2222 salomon.it4i.cz -L 5961:localhost:5961 + +### Access with SSH + +After successful obtainment of login credentials for the local +IT4Innovations account, the PRACE users can access the cluster as +regular users using SSH. For more information please see the [section in +general +documentation](accessing-the-cluster/shell-and-data-access/shell-and-data-access.html). + +File transfers +------------------ + +PRACE users can use the same transfer mechanisms as regular users (if +they've undergone the full registration procedure). For information +about this, please see [the section in the general +documentation](accessing-the-cluster/shell-and-data-access/shell-and-data-access.html). + +Apart from the standard mechanisms, for PRACE users to transfer data +to/from Salomon cluster, a GridFTP server running Globus Toolkit GridFTP +service is available. The service is available from public Internet as +well as from the internal PRACE network (accessible only from other +PRACE partners). + +There's one control server and three backend servers for striping and/or +backup in case one of them would fail. + +***Access from PRACE network:** + + |Login address|Port|Node role| + |---|---| + |gridftp-prace.salomon.it4i.cz|2812|Front end /control server| + |lgw1-prace.salomon.it4i.cz|2813|Backend / data mover server| + |lgw2-prace.salomon.it4i.cz|2813|Backend / data mover server| + |lgw3-prace.salomon.it4i.cz|2813|Backend / data mover server| + +Copy files **to** Salomon by running the following commands on your +local machine: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://gridftp-prace.salomon.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://`prace_service -i -f salomon`/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ + +Copy files **from** Salomon: + + $ globus-url-copy gsiftp://gridftp-prace.salomon.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy gsiftp://`prace_service -i -f salomon`/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + + + +***Access from public Internet:** + + |Login address|Port|Node role| + |---|---| + |gridftp.salomon.it4i.cz|2812|Front end /control server| + |lgw1.salomon.it4i.cz|2813|Backend / data mover server| + |lgw2.salomon.it4i.cz|2813|Backend / data mover server| + |lgw3.salomon.it4i.cz|2813|Backend / data mover server| + +Copy files **to** Salomon by running the following commands on your +local machine: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://gridftp.salomon.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy file://_LOCAL_PATH_TO_YOUR_FILE_ gsiftp://`prace_service -e -f salomon`/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ + +Copy files **from** Salomon: + + $ globus-url-copy gsiftp://gridftp.salomon.it4i.cz:2812/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + +Or by using prace_service script: + + $ globus-url-copy gsiftp://`prace_service -e -f salomon`/home/prace/_YOUR_ACCOUNT_ON_SALOMON_/_PATH_TO_YOUR_FILE_ file://_LOCAL_PATH_TO_YOUR_FILE_ + + + +Generally both shared file systems are available through GridFTP: + + |File system mount point|Filesystem|Comment| + |---|---| + |/home|Lustre|Default HOME directories of users in format /home/prace/login/| + |/scratch|Lustre|Shared SCRATCH mounted on the whole cluster| + +More information about the shared file systems is available +[here](storage.html). + +Please note, that for PRACE users a "prace" directory is used also on +the SCRATCH file system. + + |Data type|Default path| + |---|---| + |large project files|/scratch/work/user/prace/login/| + |large scratch/temporary data|/scratch/temp/| + +Usage of the cluster +-------------------- + +There are some limitations for PRACE user when using the cluster. By +default PRACE users aren't allowed to access special queues in the PBS +Pro to have high priority or exclusive access to some special equipment +like accelerated nodes and high memory (fat) nodes. There may be also +restrictions obtaining a working license for the commercial software +installed on the cluster, mostly because of the license agreement or +because of insufficient amount of licenses. + +For production runs always use scratch file systems. The available file +systems are described [here](storage/storage.html). + +### Software, Modules and PRACE Common Production Environment + +All system wide installed software on the cluster is made available to +the users via the modules. The information about the environment and +modules usage is in this [section of general +documentation](environment-and-modules.html). + +PRACE users can use the "prace" module to use the [PRACE Common +Production +Environment](http://www.prace-ri.eu/PRACE-common-production). + + $ module load prace + + + +### Resource Allocation and Job Execution + +General information about the resource allocation, job queuing and job +execution is in this [section of general +documentation](resource-allocation-and-job-execution/introduction.html). + +For PRACE users, the default production run queue is "qprace". PRACE +users can also use two other queues "qexp" and "qfree". + + + |queue|Active project|Project resources|Nodes|priority|authorization|walltime | + + |---|---| + |**qexp** \|no|none required|32 nodes, max 8 per user|150|no|1 / 1h| + \ + + gt; 0 >1006 nodes, max 86 per job 0 no 24 / 48h> 0 >1006 nodes, max 86 per job 0 no 24 / 48h + \ + + + |**qfree** \|yes|none required|752 nodes, max 86 per job|-1024|no|12 / 12h| + \ + + +qprace**, the PRACE \***: This queue is intended for +normal production runs. It is required that active project with nonzero +remaining resources is specified to enter the qprace. The queue runs +with medium priority and no special authorization is required to use it. +The maximum runtime in qprace is 48 hours. If the job needs longer time, +it must use checkpoint/restart functionality. + +### Accounting & Quota + +The resources that are currently subject to accounting are the core +hours. The core hours are accounted on the wall clock basis. The +accounting runs whenever the computational cores are allocated or +blocked via the PBS Pro workload manager (the qsub command), regardless +of whether the cores are actually used for any calculation. See [example +in the general +documentation](resource-allocation-and-job-execution/resources-allocation-policy.html). + +PRACE users should check their project accounting using the [PRACE +Accounting Tool +(DART)](http://www.prace-ri.eu/accounting-report-tool/). + +Users who have undergone the full local registration procedure +(including signing the IT4Innovations Acceptable Use Policy) and who +have received local password may check at any time, how many core-hours +have been consumed by themselves and their projects using the command +"it4ifree". Please note that you need to know your user password to use +the command and that the displayed core hours are "system core hours" +which differ from PRACE "standardized core hours". + +The **it4ifree** command is a part of it4i.portal.clients package, +located here: +<https://pypi.python.org/pypi/it4i.portal.clients> + + $ it4ifree + Password: +     PID  Total Used ...by me Free +   -------- ------- ------ -------- ------- +   OPEN-0-0 1500000 400644  225265 1099356 +   DD-13-1   10000 2606 2606 7394 + + + +By default file system quota is applied. To check the current status of +the quota (separate for HOME and SCRATCH) use + + $ quota + $ lfs quota -u USER_LOGIN /scratch + +If the quota is insufficient, please contact the +[support](prace.html#help-and-support) and request an +increase. + + + + + diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.md b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.md new file mode 100644 index 0000000000000000000000000000000000000000..1282e33dae3f896100f05b4046a6c45f404ae549 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.md @@ -0,0 +1,434 @@ +Capacity computing +================== + + + +Introduction +------------ + +In many cases, it is useful to submit huge (>100+) number of +computational jobs into the PBS queue system. Huge number of (small) +jobs is one of the most effective ways to execute embarrassingly +parallel calculations, achieving best runtime, throughput and computer +utilization. + +However, executing huge number of jobs via the PBS queue may strain the +system. This strain may result in slow response to commands, inefficient +scheduling and overall degradation of performance and user experience, +for all users. For this reason, the number of jobs is **limited to 100 +per user, 1500 per job array** + +Please follow one of the procedures below, in case you wish to schedule +more than >100 jobs at a time. + +- Use [Job arrays](capacity-computing.html#job-arrays) + when running huge number of + [multithread](capacity-computing.html#shared-jobscript-on-one-node) + (bound to one node only) or multinode (multithread across + several nodes) jobs +- Use [GNU + parallel](capacity-computing.html#gnu-parallel) when + running single core jobs +- Combine[GNU parallel with Job + arrays](capacity-computing.html#combining-job-arrays-and-gnu-parallel) + when running huge number of single core jobs + +Policy +------ + +1. A user is allowed to submit at most 100 jobs. Each job may be [a job + array](capacity-computing.html#job-arrays). +2. The array size is at most 1000 subjobs. + +Job arrays +-------------- + +Huge number of jobs may be easily submitted and managed as a job array. + +A job array is a compact representation of many jobs, called subjobs. +The subjobs share the same job script, and have the same values for all +attributes and resources, with the following exceptions: + +- each subjob has a unique index, $PBS_ARRAY_INDEX +- job Identifiers of subjobs only differ by their indices +- the state of subjobs can differ (R,Q,...etc.) + +All subjobs within a job array have the same scheduling priority and +schedule as independent jobs. +Entire job array is submitted through a single qsub command and may be +managed by qdel, qalter, qhold, qrls and qsig commands as a single job. + +### Shared jobscript + +All subjobs in job array use the very same, single jobscript. Each +subjob runs its own instance of the jobscript. The instances execute +different work controlled by $PBS_ARRAY_INDEX variable. + +Example: + +Assume we have 900 input files with name beginning with "file" (e. g. +file001, ..., file900). Assume we would like to use each of these input +files with program executable myprog.x, each as a separate job. + +First, we create a tasklist file (or subjobs list), listing all tasks +(subjobs) - all input files in our example: + +` +$ find . -name 'file*' > tasklist +` + +Then we create jobscript: + +` +#!/bin/bash +#PBS -A PROJECT_ID +#PBS -q qprod +#PBS -l select=1:ncpus=24,walltime=02:00:00 + +# change to local scratch directory +SCR=/scratch/work/user/$USER/$PBS_JOBID +mkdir -p $SCR ; cd $SCR || exit + +# get individual tasks from tasklist with index from PBS JOB ARRAY +TASK=$(sed -n "${PBS_ARRAY_INDEX}p" $PBS_O_WORKDIR/tasklist) + +# copy input file and executable to scratch +cp $PBS_O_WORKDIR/$TASK input ; cp $PBS_O_WORKDIR/myprog.x . + +# execute the calculation +./myprog.x < input > output + +# copy output file to submit directory +cp output $PBS_O_WORKDIR/$TASK.out +` + +In this example, the submit directory holds the 900 input files, +executable myprog.x and the jobscript file. As input for each run, we +take the filename of input file from created tasklist file. We copy the +input file to scratch /scratch/work/user/$USER/$PBS_JOBID, execute +the myprog.x and copy the output file back to >the submit +directory, under the $TASK.out name. The myprog.x runs on one +node only and must use threads to run in parallel. Be aware, that if the +myprog.x **is not multithreaded**, then all the **jobs are run as single +thread programs in sequential** manner. Due to allocation of the whole +node, the **accounted time is equal to the usage of whole node**, while +using only 1/24 of the node! + +If huge number of parallel multicore (in means of multinode multithread, +e. g. MPI enabled) jobs is needed to run, then a job array approach +should also be used. The main difference compared to previous example +using one node is that the local scratch should not be used (as it's not +shared between nodes) and MPI or other technique for parallel multinode +run has to be used properly. + +### Submit the job array + +To submit the job array, use the qsub -J command. The 900 jobs of the +[example above](capacity-computing.html#array_example) may +be submitted like this: + +` +$ qsub -N JOBNAME -J 1-900 jobscript +506493[].isrv5 +` + +In this example, we submit a job array of 900 subjobs. Each subjob will +run on full node and is assumed to take less than 2 hours (please note +the #PBS directives in the beginning of the jobscript file, dont' +forget to set your valid PROJECT_ID and desired queue). + +Sometimes for testing purposes, you may need to submit only one-element +array. This is not allowed by PBSPro, but there's a workaround: + +` +$ qsub -N JOBNAME -J 9-10:2 jobscript +` + +This will only choose the lower index (9 in this example) for +submitting/running your job. + +### Manage the job array + +Check status of the job array by the qstat command. + +` +$ qstat -a 506493[].isrv5 + +isrv5: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +12345[].dm2 user2 qprod xx 13516 1 24 -- 00:50 B 00:02 +` + +The status B means that some subjobs are already running. + +Check status of the first 100 subjobs by the qstat command. + +` +$ qstat -a 12345[1-100].isrv5 + +isrv5: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +12345[1].isrv5 user2 qprod xx 13516 1 24 -- 00:50 R 00:02 +12345[2].isrv5 user2 qprod xx 13516 1 24 -- 00:50 R 00:02 +12345[3].isrv5 user2 qprod xx 13516 1 24 -- 00:50 R 00:01 +12345[4].isrv5 user2 qprod xx 13516 1 24 -- 00:50 Q -- + . . . . . . . . . . . + , . . . . . . . . . . +12345[100].isrv5 user2 qprod xx 13516 1 24 -- 00:50 Q -- +` + +Delete the entire job array. Running subjobs will be killed, queueing +subjobs will be deleted. + +` +$ qdel 12345[].isrv5 +` + +Deleting large job arrays may take a while. + +Display status information for all user's jobs, job arrays, and subjobs. + +` +$ qstat -u $USER -t +` + +Display status information for all user's subjobs. + +` +$ qstat -u $USER -tJ +` + +Read more on job arrays in the [PBSPro Users +guide](../../pbspro-documentation.html). + +GNU parallel +---------------- + +Use GNU parallel to run many single core tasks on one node. + +GNU parallel is a shell tool for executing jobs in parallel using one or +more computers. A job can be a single command or a small script that has +to be run for each of the lines in the input. GNU parallel is most +useful in running single core jobs via the queue system on Anselm. + +For more information and examples see the parallel man page: + +` +$ module add parallel +$ man parallel +` + +### GNU parallel jobscript + +The GNU parallel shell executes multiple instances of the jobscript +using all cores on the node. The instances execute different work, +controlled by the $PARALLEL_SEQ variable. + +Example: + +Assume we have 101 input files with name beginning with "file" (e. g. +file001, ..., file101). Assume we would like to use each of these input +files with program executable myprog.x, each as a separate single core +job. We call these single core jobs tasks. + +First, we create a tasklist file, listing all tasks - all input files in +our example: + +` +$ find . -name 'file*' > tasklist +` + +Then we create jobscript: + +` +#!/bin/bash +#PBS -A PROJECT_ID +#PBS -q qprod +#PBS -l select=1:ncpus=24,walltime=02:00:00 + +[ -z "$PARALLEL_SEQ" ] && +{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/tasklist $0 ; } + +# change to local scratch directory +SCR=/scratch/work/user/$USER/$PBS_JOBID/$PARALLEL_SEQ +mkdir -p $SCR ; cd $SCR || exit + +# get individual task from tasklist +TASK=$1  + +# copy input file and executable to scratch +cp $PBS_O_WORKDIR/$TASK input + +# execute the calculation +cat input > output + +# copy output file to submit directory +cp output $PBS_O_WORKDIR/$TASK.out +` + +In this example, tasks from tasklist are executed via the GNU +parallel. The jobscript executes multiple instances of itself in +parallel, on all cores of the node. Once an instace of jobscript is +finished, new instance starts until all entries in tasklist are +processed. Currently processed entry of the joblist may be retrieved via +$1 variable. Variable $TASK expands to one of the input filenames from +tasklist. We copy the input file to local scratch, execute the myprog.x +and copy the output file back to the submit directory, under the +$TASK.out name. + +### Submit the job + +To submit the job, use the qsub command. The 101 tasks' job of the +[example above](capacity-computing.html#gp_example) may be +submitted like this: + +` +$ qsub -N JOBNAME jobscript +12345.dm2 +` + +In this example, we submit a job of 101 tasks. 24 input files will be +processed in parallel. The 101 tasks on 24 cores are assumed to +complete in less than 2 hours. + +Please note the #PBS directives in the beginning of the jobscript file, +dont' forget to set your valid PROJECT_ID and desired queue. + +Job arrays and GNU parallel +------------------------------- + +Combine the Job arrays and GNU parallel for best throughput of single +core jobs + +While job arrays are able to utilize all available computational nodes, +the GNU parallel can be used to efficiently run multiple single-core +jobs on single node. The two approaches may be combined to utilize all +available (current and future) resources to execute single core jobs. + +Every subjob in an array runs GNU parallel to utilize all cores on the +node + +### GNU parallel, shared jobscript + +Combined approach, very similar to job arrays, can be taken. Job array +is submitted to the queuing system. The subjobs run GNU parallel. The +GNU parallel shell executes multiple instances of the jobscript using +all cores on the node. The instances execute different work, controlled +by the $PBS_JOB_ARRAY and $PARALLEL_SEQ variables. + +Example: + +Assume we have 992 input files with name beginning with "file" (e. g. +file001, ..., file992). Assume we would like to use each of these input +files with program executable myprog.x, each as a separate single core +job. We call these single core jobs tasks. + +First, we create a tasklist file, listing all tasks - all input files in +our example: + +` +$ find . -name 'file*' > tasklist +` + +Next we create a file, controlling how many tasks will be executed in +one subjob + +` +$ seq 32 > numtasks +` + +Then we create jobscript: + +` +#!/bin/bash +#PBS -A PROJECT_ID +#PBS -q qprod +#PBS -l select=1:ncpus=24,walltime=02:00:00 + +[ -z "$PARALLEL_SEQ" ] && +{ module add parallel ; exec parallel -a $PBS_O_WORKDIR/numtasks $0 ; } + +# change to local scratch directory +SCR=/scratch/work/user/$USER/$PBS_JOBID/$PARALLEL_SEQ +mkdir -p $SCR ; cd $SCR || exit + +# get individual task from tasklist with index from PBS JOB ARRAY and index form Parallel +IDX=$(($PBS_ARRAY_INDEX + $PARALLEL_SEQ - 1)) +TASK=$(sed -n "${IDX}p" $PBS_O_WORKDIR/tasklist) +[ -z "$TASK" ] && exit + +# copy input file and executable to scratch +cp $PBS_O_WORKDIR/$TASK input + +# execute the calculation +cat input > output + +# copy output file to submit directory +cp output $PBS_O_WORKDIR/$TASK.out +` + +In this example, the jobscript executes in multiple instances in +parallel, on all cores of a computing node. Variable $TASK expands to +one of the input filenames from tasklist. We copy the input file to +local scratch, execute the myprog.x and copy the output file back to the +submit directory, under the $TASK.out name. The numtasks file controls +how many tasks will be run per subjob. Once an task is finished, new +task starts, until the number of tasks in numtasks file is reached. + +Select subjob walltime and number of tasks per subjob carefully + + When deciding this values, think about following guiding rules : + +1. Let n=N/24. Inequality (n+1) * T < W should hold. The N is + number of tasks per subjob, T is expected single task walltime and W + is subjob walltime. Short subjob walltime improves scheduling and + job throughput. +2. Number of tasks should be modulo 24. +3. These rules are valid only when all tasks have similar task + walltimes T. + +### Submit the job array + +To submit the job array, use the qsub -J command. The 992 tasks' job of +the [example +above](capacity-computing.html#combined_example) may be +submitted like this: + +` +$ qsub -N JOBNAME -J 1-992:32 jobscript +12345[].dm2 +` + +In this example, we submit a job array of 31 subjobs. Note the -J +1-992:**48**, this must be the same as the number sent to numtasks file. +Each subjob will run on full node and process 24 input files in +parallel, 48 in total per subjob. Every subjob is assumed to complete +in less than 2 hours. + +Please note the #PBS directives in the beginning of the jobscript file, +dont' forget to set your valid PROJECT_ID and desired queue. + +Examples +-------- + +Download the examples in +[capacity.zip](capacity-computing-example), +illustrating the above listed ways to run huge number of jobs. We +recommend to try out the examples, before using this for running +production jobs. + +Unzip the archive in an empty directory on Anselm and follow the +instructions in the README file + +` +$ unzip capacity.zip +$ cd capacity +$ cat README +` + + + diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/fairshare_formula.png b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/fairshare_formula.png new file mode 100644 index 0000000000000000000000000000000000000000..6a5a1443fa08cd9d3c62bea52bbb48136b2501dc Binary files /dev/null and b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/fairshare_formula.png differ diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.md b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..6ef6f4cfabb1db6af5bb44a227aa167f223246c0 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.md @@ -0,0 +1,56 @@ +Resource Allocation and Job Execution +===================================== + + + +To run a [job](job-submission-and-execution.html), +[computational +resources](resources-allocation-policy.html) for this +particular job must be allocated. This is done via the PBS Pro job +workload manager software, which efficiently distributes workloads +across the supercomputer. Extensive informations about PBS Pro can be +found in the [official documentation +here](../../pbspro-documentation.html), especially in +the [PBS Pro User's +Guide](https://docs.it4i.cz/pbspro-documentation/pbspro-users-guide). + +Resources Allocation Policy +--------------------------- + +The resources are allocated to the job in a fairshare fashion, subject +to constraints set by the queue and resources available to the Project. +[The Fairshare](job-priority.html) at Salomon ensures +that individual users may consume approximately equal amount of +resources per week. The resources are accessible via several queues for +queueing the jobs. The queues provide prioritized and exclusive access +to the computational resources. Following queues are available to Anselm +users: + +- **qexp**, the \ +- **qprod**, the \*** +- **qlong**, the Long queue +- **qmpp**, the Massively parallel queue +- **qfat**, the queue to access SMP UV2000 machine +- **qfree,** the Free resource utilization queue + +Check the queue status at <https://extranet.it4i.cz/rsweb/salomon/> + +Read more on the [Resource Allocation +Policy](resources-allocation-policy.html) page. + +Job submission and execution +---------------------------- + +Use the **qsub** command to submit your jobs. + +The qsub submits the job into the queue. The qsub command creates a +request to the PBS Job manager for allocation of specified resources. +The **smallest allocation unit is entire node, 24 cores**, with +exception of the qexp queue. The resources will be allocated when +available, subject to allocation policies and constraints. **After the +resources are allocated the jobscript or interactive shell is executed +on first of the allocated nodes.** + +Read more on the [Job submission and +execution](job-submission-and-execution.html) page. + diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.md b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.md new file mode 100644 index 0000000000000000000000000000000000000000..43919cced0890a66a683f0a02c26d8aed8aa75ef --- /dev/null +++ b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.md @@ -0,0 +1,109 @@ +Job scheduling +============== + +Job execution priority +---------------------- + +Scheduler gives each job an execution priority and then uses this job +execution priority to select which job(s) to run. + +Job execution priority is determined by these job properties (in order +of importance): + +1. queue priority +2. fairshare priority +3. eligible time + +### Queue priority + +Queue priority is priority of queue where job is queued before +execution. + +Queue priority has the biggest impact on job execution priority. +Execution priority of jobs in higher priority queues is always greater +than execution priority of jobs in lower priority queues. Other +properties of job used for determining job execution priority (fairshare +priority, eligible time) cannot compete with queue priority. + +Queue priorities can be seen at +<https://extranet.it4i.cz/rsweb/salomon/queues> + +### Fairshare priority + +Fairshare priority is priority calculated on recent usage of resources. +Fairshare priority is calculated per project, all members of project +share same fairshare priority. Projects with higher recent usage have +lower fairshare priority than projects with lower or none recent usage. + +Fairshare priority is used for ranking jobs with equal queue priority. + +Fairshare priority is calculated as + + + +where MAX_FAIRSHARE has value 1E6, +usage~Project~ is cumulated usage by all members of selected project, +usage~Total~ is total usage by all users, by all projects. + +Usage counts allocated corehours (ncpus*walltime). Usage is decayed, or +cut in half periodically, at the interval 168 hours (one week). +Jobs queued in queue qexp are not calculated to project's usage. + +Calculated usage and fairshare priority can be seen at +<https://extranet.it4i.cz/rsweb/salomon/projects>. + +Calculated fairshare priority can be also seen as +Resource_List.fairshare attribute of a job. + +###Eligible time + +Eligible time is amount (in seconds) of eligible time job accrued while +waiting to run. Jobs with higher eligible time gains higher +priority. + +Eligible time has the least impact on execution priority. Eligible time +is used for sorting jobs with equal queue priority and fairshare +priority. It is very, very difficult for >eligible time to +compete with fairshare priority. + +Eligible time can be seen as eligible_time attribute of +job. + +### Formula + +Job execution priority (job sort formula) is calculated as: + + + +### Job backfilling + +The scheduler uses job backfilling. + +Backfilling means fitting smaller jobs around the higher-priority jobs +that the scheduler is going to run next, in such a way that the +higher-priority jobs are not delayed. Backfilling allows us to keep +resources from becoming idle when the top job (job with the highest +execution priority) cannot run. + +The scheduler makes a list of jobs to run in order of execution +priority. Scheduler looks for smaller jobs that can fit into the usage +gaps +around the highest-priority jobs in the list. The scheduler looks in the +prioritized list of jobs and chooses the highest-priority smaller jobs +that fit. Filler jobs are run only if they will not delay the start time +of top jobs. + +It means, that jobs with lower execution priority can be run before jobs +with higher execution priority. + +It is **very beneficial to specify the walltime** when submitting jobs. + +Specifying more accurate walltime enables better schedulling, better +execution times and better resource usage. Jobs with suitable (small) +walltime could be backfilled - and overtake job(s) with higher priority. + +### Job placement + +Job [placement can be controlled by flags during +submission](job-submission-and-execution.html#job_placement). + diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.md b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.md new file mode 100644 index 0000000000000000000000000000000000000000..e0499050b26d8c4e6f9e56c36bc9acf7f3811ebb --- /dev/null +++ b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.md @@ -0,0 +1,595 @@ +Job submission and execution +============================ + + + +Job Submission +-------------- + +When allocating computational resources for the job, please specify + +1. suitable queue for your job (default is qprod) +2. number of computational nodes required +3. number of cores per node required +4. maximum wall time allocated to your calculation, note that jobs + exceeding maximum wall time will be killed +5. Project ID +6. Jobscript or interactive switch + +Use the **qsub** command to submit your job to a queue for allocation of +the computational resources. + +Submit the job using the qsub command: + +` +$ qsub -A Project_ID -q queue -l select=x:ncpus=y,walltime=[[hh:]mm:]ss[.ms] jobscript +` + +The qsub submits the job into the queue, in another words the qsub +command creates a request to the PBS Job manager for allocation of +specified resources. The resources will be allocated when available, +subject to above described policies and constraints. **After the +resources are allocated the jobscript or interactive shell is executed +on first of the allocated nodes.** + +PBS statement nodes (qsub -l nodes=nodespec) is not supported on Salomon +cluster.** + +### Job Submission Examples + +` +$ qsub -A OPEN-0-0 -q qprod -l select=64:ncpus=24,walltime=03:00:00 ./myjob +` + +In this example, we allocate 64 nodes, 24 cores per node, for 3 hours. +We allocate these resources via the qprod queue, consumed resources will +be accounted to the Project identified by Project ID OPEN-0-0. Jobscript +myjob will be executed on the first node in the allocation. + + + +` +$ qsub -q qexp -l select=4:ncpus=24 -I +` + +In this example, we allocate 4 nodes, 24 cores per node, for 1 hour. We +allocate these resources via the qexp queue. The resources will be +available interactively + + + +` +$ qsub -A OPEN-0-0 -q qlong -l select=10:ncpus=24 ./myjob +` + +In this example, we allocate 10 nodes, 24 cores per node, for 72 hours. +We allocate these resources via the qlong queue. Jobscript myjob will be +executed on the first node in the allocation. + + + +` +$ qsub -A OPEN-0-0 -q qfree -l select=10:ncpus=24 ./myjob +` + +In this example, we allocate 10 nodes, 24 cores per node, for 12 hours. +We allocate these resources via the qfree queue. It is not required that +the project OPEN-0-0 has any available resources left. Consumed +resources are still accounted for. Jobscript myjob will be executed on +the first node in the allocation. + +### Intel Xeon Phi co-processors + +To allocate a node with Xeon Phi co-processor, user needs to specify +that in select statement. Currently only allocation of whole nodes with +both Phi cards as the smallest chunk is supported. Standard PBSPro +approach through attributes "accelerator", "naccelerators" and +"accelerator_model" is used. The "accelerator_model" can be omitted, +since on Salomon only one type of accelerator type/model is available. + +The absence of specialized queue for accessing the nodes with cards +means, that the Phi cards can be utilized in any queue, including qexp +for testing/experiments, qlong for longer jobs, qfree after the project +resources have been spent, etc. The Phi cards are thus also available to +PRACE users. There's no need to ask for permission to utilize the Phi +cards in project proposals. + +` +$ qsub -A OPEN-0-0 -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 ./myjob +` + +In this example, we allocate 1 node, with 24 cores, with 2 Xeon Phi +7120p cards, running batch job ./myjob. The default time for qprod is +used, e. g. 24 hours. + +` +$ qsub -A OPEN-0-0 -I -q qlong -l select=4:ncpus=24:accelerator=True:naccelerators=2 -l walltime=56:00:00 -I +` + +In this example, we allocate 4 nodes, with 24 cores per node (totalling +96 cores), with 2 Xeon Phi 7120p cards per node (totalling 8 Phi cards), +running interactive job for 56 hours. The accelerator model name was +omitted. + +### UV2000 SMP + +14 NUMA nodes available on UV2000 +Per NUMA node allocation. +Jobs are isolated by cpusets. + +The UV2000 (node uv1) offers 3328GB of RAM and 112 cores, distributed in +14 NUMA nodes. A NUMA node packs 8 cores and approx. 236GB RAM. In the +PBS the UV2000 provides 14 chunks, a chunk per NUMA node (see +[Resource allocation +policy](resources-allocation-policy.html)). The jobs on +UV2000 are isolated from each other by cpusets, so that a job by one +user may not utilize CPU or memory allocated to a job by other user. +Always, full chunks are allocated, a job may only use resources of the +NUMA nodes allocated to itself. + +` + $ qsub -A OPEN-0-0 -q qfat -l select=14 ./myjob +` + +In this example, we allocate all 14 NUMA nodes (corresponds to 14 +chunks), 112 cores of the SGI UV2000 node for 72 hours. Jobscript myjob +will be executed on the node uv1. + +` +$ qsub -A OPEN-0-0 -q qfat -l select=1:mem=2000GB ./myjob +` + +In this example, we allocate 2000GB of memory on the UV2000 for 72 +hours. By requesting 2000GB of memory, 10 chunks are allocated. +Jobscript myjob will be executed on the node uv1. + +### Useful tricks + +All qsub options may be [saved directly into the +jobscript](job-submission-and-execution.html#PBSsaved). In +such a case, no options to qsub are needed. + +` +$ qsub ./myjob +` + + + +By default, the PBS batch system sends an e-mail only when the job is +aborted. Disabling mail events completely can be done like this: + +` +$ qsub -m n +` + +Advanced job placement +-------------------------- + +### Placement by name + +Specific nodes may be allocated via the PBS + +` +qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=24:host=r24u35n680+1:ncpus=24:host=r24u36n681 -I +` + +Or using short names + +` +qsub -A OPEN-0-0 -q qprod -l select=1:ncpus=24:host=cns680+1:ncpus=24:host=cns681 -I +` + +In this example, we allocate nodes r24u35n680 and r24u36n681, all 24 +cores per node, for 24 hours. Consumed resources will be accounted to +the Project identified by Project ID OPEN-0-0. The resources will be +available interactively. + +### Placement by |Hypercube|dimension| + +Nodes may be selected via the PBS resource attribute ehc_[1-7]d . + + |Hypercube|dimension| + --------------- |---|---|--------------------------------- + |1D|ehc_1d| + |2D|ehc_2d| + |3D|ehc_3d| + |4D|ehc_4d| + |5D|ehc_5d| + |6D|ehc_6d| + |7D|ehc_7d| + + + +` +$ qsub -A OPEN-0-0 -q qprod -l select=4:ncpus=24 -l place=group=ehc_1d -I +` + +In this example, we allocate 4 nodes, 24 cores, selecting only the nodes +with [hypercube +dimension](../network-1/7d-enhanced-hypercube.html) 1. + +### Placement by IB switch + +Groups of computational nodes are connected to chassis integrated +Infiniband switches. These switches form the leaf switch layer of the +[Infiniband network](../network-1.html) . Nodes sharing +the leaf switch can communicate most efficiently. Sharing the same +switch prevents hops in the network and provides for unbiased, most +efficient network communication. + +There are at most 9 nodes sharing the same Infiniband switch. + +Infiniband switch list: + +` +$ qmgr -c "print node @a" | grep switch +set node r4i1n11 resources_available.switch = r4i1s0sw1 +set node r2i0n0 resources_available.switch = r2i0s0sw1 +set node r2i0n1 resources_available.switch = r2i0s0sw1 +... +` + +List of all nodes per Infiniband switch: + +` +$ qmgr -c "print node @a" | grep r36sw3 +set node r36u31n964 resources_available.switch = r36sw3 +set node r36u32n965 resources_available.switch = r36sw3 +set node r36u33n966 resources_available.switch = r36sw3 +set node r36u34n967 resources_available.switch = r36sw3 +set node r36u35n968 resources_available.switch = r36sw3 +set node r36u36n969 resources_available.switch = r36sw3 +set node r37u32n970 resources_available.switch = r36sw3 +set node r37u33n971 resources_available.switch = r36sw3 +set node r37u34n972 resources_available.switch = r36sw3 +` + +Nodes sharing the same switch may be selected via the PBS resource +attribute switch. + +We recommend allocating compute nodes of a single switch when best +possible computational network performance is required to run the job +efficiently: + +` +$ qsub -A OPEN-0-0 -q qprod -l select=9:ncpus=24:switch=r4i1s0sw1 ./myjob +` + +In this example, we request all the 9 nodes sharing the r4i1s0sw1 switch +for 24 hours. + +` +$ qsub -A OPEN-0-0 -q qprod -l select=9:ncpus=24 -l place=group=switch ./myjob +` + +In this example, we request 9 nodes placed on the same switch using node +grouping placement for 24 hours. + +HTML commented section #1 (turbo boost is to be implemented) + +Job Management +-------------- + +Check status of your jobs using the **qstat** and **check-pbs-jobs** +commands + +` +$ qstat -a +$ qstat -a -u username +$ qstat -an -u username +$ qstat -f 12345.isrv5 +` + +Example: + +` +$ qstat -a + +srv11: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +16287.isrv5 user1 qlong job1 6183 4 64 -- 144:0 R 38:25 +16468.isrv5 user1 qlong job2 8060 4 64 -- 144:0 R 17:44 +16547.isrv5 user2 qprod job3x 13516 2 32 -- 48:00 R 00:58 +` + +In this example user1 and user2 are running jobs named job1, job2 and +job3x. The jobs job1 and job2 are using 4 nodes, 16 cores per node each. +The job1 already runs for 38 hours and 25 minutes, job2 for 17 hours 44 +minutes. The job1 already consumed 64*38.41 = 2458.6 core hours. The +job3x already consumed 0.96*32 = 30.93 core hours. These consumed core +hours will be accounted on the respective project accounts, regardless +of whether the allocated cores were actually used for computations. + +Check status of your jobs using check-pbs-jobs command. Check presence +of user's PBS jobs' processes on execution hosts. Display load, +processes. Display job standard and error output. Continuously display +(tail -f) job standard or error output. + +` +$ check-pbs-jobs --check-all +$ check-pbs-jobs --print-load --print-processes +$ check-pbs-jobs --print-job-out --print-job-err +$ check-pbs-jobs --jobid JOBID --check-all --print-all +$ check-pbs-jobs --jobid JOBID --tailf-job-out +` + +Examples: + +` +$ check-pbs-jobs --check-all +JOB 35141.dm2, session_id 71995, user user2, nodes r3i6n2,r3i6n3 +Check session id: OK +Check processes +r3i6n2: OK +r3i6n3: No process +` + +In this example we see that job 35141.dm2 currently runs no process on +allocated node r3i6n2, which may indicate an execution error. + +` +$ check-pbs-jobs --print-load --print-processes +JOB 35141.dm2, session_id 71995, user user2, nodes r3i6n2,r3i6n3 +Print load +r3i6n2: LOAD: 16.01, 16.01, 16.00 +r3i6n3: LOAD: 0.01, 0.00, 0.01 +Print processes + %CPU CMD +r3i6n2: 0.0 -bash +r3i6n2: 0.0 /bin/bash /var/spool/PBS/mom_priv/jobs/35141.dm2.SC +r3i6n2: 99.7 run-task +... +` + +In this example we see that job 35141.dm2 currently runs process +run-task on node r3i6n2, using one thread only, while node r3i6n3 is +empty, which may indicate an execution error. + +` +$ check-pbs-jobs --jobid 35141.dm2 --print-job-out +JOB 35141.dm2, session_id 71995, user user2, nodes r3i6n2,r3i6n3 +Print job standard output: +======================== Job start ========================== +Started at   : Fri Aug 30 02:47:53 CEST 2013 +Script name  : script +Run loop 1 +Run loop 2 +Run loop 3 +` + +In this example, we see actual output (some iteration loops) of the job +35141.dm2 + +Manage your queued or running jobs, using the **qhold**, **qrls**, +qdel,** **qsig** or **qalter** commands + +You may release your allocation at any time, using qdel command + +` +$ qdel 12345.isrv5 +` + +You may kill a running job by force, using qsig command + +` +$ qsig -s 9 12345.isrv5 +` + +Learn more by reading the pbs man page + +` +$ man pbs_professional +` + +Job Execution +------------- + +### Jobscript + +Prepare the jobscript to run batch jobs in the PBS queue system + +The Jobscript is a user made script, controlling sequence of commands +for executing the calculation. It is often written in bash, other +scripts may be used as well. The jobscript is supplied to PBS **qsub** +command as an argument and executed by the PBS Professional workload +manager. + +The jobscript or interactive shell is executed on first of the allocated +nodes. + +` +$ qsub -q qexp -l select=4:ncpus=24 -N Name0 ./myjob +$ qstat -n -u username + +isrv5: + Req'd Req'd Elap +Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time +--------------- -------- -- |---|---| ------ --- --- ------ ----- - ----- +15209.isrv5 username qexp Name0 5530 4 96 -- 01:00 R 00:00 + r21u01n577/0*24+r21u02n578/0*24+r21u03n579/0*24+r21u04n580/0*24 +` + + In this example, the nodes r21u01n577, r21u02n578, r21u03n579, +r21u04n580 were allocated for 1 hour via the qexp queue. The jobscript +myjob will be executed on the node r21u01n577, while the +nodes r21u02n578, r21u03n579, r21u04n580 are available for use as well. + +The jobscript or interactive shell is by default executed in home +directory + +` +$ qsub -q qexp -l select=4:ncpus=24 -I +qsub: waiting for job 15210.isrv5 to start +qsub: job 15210.isrv5 ready + +$ pwd +/home/username +` + +In this example, 4 nodes were allocated interactively for 1 hour via the +qexp queue. The interactive shell is executed in the home directory. + +All nodes within the allocation may be accessed via ssh. Unallocated +nodes are not accessible to user. + +The allocated nodes are accessible via ssh from login nodes. The nodes +may access each other via ssh as well. + +Calculations on allocated nodes may be executed remotely via the MPI, +ssh, pdsh or clush. You may find out which nodes belong to the +allocation by reading the $PBS_NODEFILE file + +` +qsub -q qexp -l select=2:ncpus=24 -I +qsub: waiting for job 15210.isrv5 to start +qsub: job 15210.isrv5 ready + +$ pwd +/home/username + +$ sort -u $PBS_NODEFILE +r2i5n6.ib0.smc.salomon.it4i.cz +r4i6n13.ib0.smc.salomon.it4i.cz +r4i7n0.ib0.smc.salomon.it4i.cz +r4i7n2.ib0.smc.salomon.it4i.cz + + +$ pdsh -w r2i5n6,r4i6n13,r4i7n[0,2] hostname +r4i6n13: r4i6n13 +r2i5n6: r2i5n6 +r4i7n2: r4i7n2 +r4i7n0: r4i7n0 +` + +In this example, the hostname program is executed via pdsh from the +interactive shell. The execution runs on all four allocated nodes. The +same result would be achieved if the pdsh is called from any of the +allocated nodes or from the login nodes. + +### Example Jobscript for MPI Calculation + +Production jobs must use the /scratch directory for I/O + +The recommended way to run production jobs is to change to /scratch +directory early in the jobscript, copy all inputs to /scratch, execute +the calculations and copy outputs to home directory. + +` +#!/bin/bash + +# change to scratch directory, exit on failure +SCRDIR=/scratch/work/user/$USER/myjob +mkdir -p $SCRDIR +cd $SCRDIR || exit + +# copy input file to scratch +cp $PBS_O_WORKDIR/input . +cp $PBS_O_WORKDIR/mympiprog.x . + +# load the mpi module +module load OpenMPI + +# execute the calculation +mpiexec -pernode ./mympiprog.x + +# copy output file to home +cp output $PBS_O_WORKDIR/. + +#exit +exit +` + +In this example, some directory on the /home holds the input file input +and executable mympiprog.x . We create a directory myjob on the /scratch +filesystem, copy input and executable files from the /home directory +where the qsub was invoked ($PBS_O_WORKDIR) to /scratch, execute the +MPI programm mympiprog.x and copy the output file back to the /home +directory. The mympiprog.x is executed as one process per node, on all +allocated nodes. + +Consider preloading inputs and executables onto [shared +scratch](../storage.html) before the calculation starts. + +In some cases, it may be impractical to copy the inputs to scratch and +outputs to home. This is especially true when very large input and +output files are expected, or when the files should be reused by a +subsequent calculation. In such a case, it is users responsibility to +preload the input files on shared /scratch before the job submission and +retrieve the outputs manually, after all calculations are finished. + +Store the qsub options within the jobscript. +Use **mpiprocs** and **ompthreads** qsub options to control the MPI job +execution. + +Example jobscript for an MPI job with preloaded inputs and executables, +options for qsub are stored within the script : + +` +#!/bin/bash +#PBS -q qprod +#PBS -N MYJOB +#PBS -l select=100:ncpus=24:mpiprocs=1:ompthreads=24 +#PBS -A OPEN-0-0 + +# change to scratch directory, exit on failure +SCRDIR=/scratch/work/user/$USER/myjob +cd $SCRDIR || exit + +# load the mpi module +module load OpenMPI + +# execute the calculation +mpiexec ./mympiprog.x + +#exit +exit +` + +In this example, input and executable files are assumed preloaded +manually in /scratch/$USER/myjob directory. Note the **mpiprocs** and +ompthreads** qsub options, controlling behavior of the MPI execution. +The mympiprog.x is executed as one process per node, on all 100 +allocated nodes. If mympiprog.x implements OpenMP threads, it will run +24 threads per node. + +HTML commented section #2 (examples need to be reworked) + +### Example Jobscript for Single Node Calculation + +Local scratch directory is often useful for single node jobs. Local +scratch will be deleted immediately after the job ends. +Be very careful, use of RAM disk filesystem is at the expense of +operational memory. + +Example jobscript for single node calculation, using [local +scratch](../storage.html) on the node: + +` +#!/bin/bash + +# change to local scratch directory +cd /lscratch/$PBS_JOBID || exit + +# copy input file to scratch +cp $PBS_O_WORKDIR/input . +cp $PBS_O_WORKDIR/myprog.x . + +# execute the calculation +./myprog.x + +# copy output file to home +cp output $PBS_O_WORKDIR/. + +#exit +exit +` + +In this example, some directory on the home holds the input file input +and executable myprog.x . We copy input and executable files from the +home directory where the qsub was invoked ($PBS_O_WORKDIR) to local +scratch /lscratch/$PBS_JOBID, execute the myprog.x and copy the output +file back to the /home directory. The myprog.x runs on one node only and +may use threads. + diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job_sort_formula.png b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job_sort_formula.png new file mode 100644 index 0000000000000000000000000000000000000000..6078911559aa56effb4b342fa4ffd074cfaed46f Binary files /dev/null and b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job_sort_formula.png differ diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.md b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.md new file mode 100644 index 0000000000000000000000000000000000000000..3395d6f7301eb6413d10d9be1c1faea632cf2a47 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.md @@ -0,0 +1,236 @@ +Resources Allocation Policy +=========================== + + + +Resources Allocation Policy +--------------------------- + +The resources are allocated to the job in a fairshare fashion, subject +to constraints set by the queue and resources available to the Project. +The Fairshare at Anselm ensures that individual users may consume +approximately equal amount of resources per week. Detailed information +in the [Job scheduling](job-priority.html) section. The +resources are accessible via several queues for queueing the jobs. The +queues provide prioritized and exclusive access to the computational +resources. Following table provides the queue partitioning overview: + + + + |queue |active project |project resources |nodes<th align="left">min ncpus*<th align="left">priority<th align="left">authorization<th align="left">walltime | + | --- | --- | + |<strong>qexp</strong>\ |no |none required |32 nodes, max 8 per user |24 |>150 |no |1 / 1h | + |<strong>qprod</strong>\ |yes |> 0 |>1006 nodes, max 86 per job\ |24 |0 |no |24 / 48h | + |<strong>qlong</strong>Long queue |yes |> 0 |256 nodes, max 40 per job, only non-accelerated nodes allowed |24 |0 |no |72 / 144h | + |<strong>qmpp</strong>Massive parallel queue |yes |> 0 |1006 nodes |24 |0 |yes |2 / 4h | + |<strong>qfat</strong>UV2000 queue |yes |> 0\ |1 (uv1) |8 |0 |yes |24 / 48h | + |<strong>qfree</strong>\ |yes |none required |752 nodes, max 86 per job |24 |-1024 |no |12 / 12h | + |<strong><strong>qviz</strong></strong>Visualization queue |yes |none required |2 (with NVIDIA Quadro K5000) |4 |150 |no |1 / 2h | + + + +The qfree queue is not free of charge**. [Normal +accounting](resources-allocation-policy.html#resources-accounting-policy) +applies. However, it allows for utilization of free resources, once a +Project exhausted all its allocated computational resources. This does +not apply for Directors Discreation's projects (DD projects) by default. +Usage of qfree after exhaustion of DD projects computational resources +is allowed after request for this queue. + + + +- **qexp**, the \: This queue is dedicated for testing and + running very small jobs. It is not required to specify a project to + enter the qexp. >*>There are 2 nodes always reserved for + this queue (w/o accelerator), maximum 8 nodes are available via the + qexp for a particular user. *The nodes may be + allocated on per core basis. No special authorization is required to + use it. The maximum runtime in qexp is 1 hour. +- **qprod**, the \***: This queue is intended for + normal production runs. It is required that active project with + nonzero remaining resources is specified to enter the qprod. All + nodes may be accessed via the qprod queue, however only 86 per job. + ** Full nodes, 24 cores per node are allocated. The queue runs with + medium priority and no special authorization is required to use it. + The maximum runtime in qprod is 48 hours. +- **qlong**, the Long queue***: This queue is intended for long + production runs. It is required that active project with nonzero + remaining resources is specified to enter the qlong. Only 336 nodes + without acceleration may be accessed via the qlong queue. Full + nodes, 24 cores per node are allocated. The queue runs with medium + priority and no special authorization is required to use it.> + *The maximum runtime in qlong is 144 hours (three times of the + standard qprod time - 3 * 48 h)* +- >***qmpp**, the massively parallel queue. This queue is + intended for massively parallel runs. It is required that active + project with nonzero remaining resources is specified to enter + the qmpp. All nodes may be accessed via the qmpp queue. ** Full + nodes, 24 cores per node are allocated. The queue runs with medium + priority and no special authorization is required to use it. The + maximum runtime in qmpp is 4 hours. An PI> *needs explicitly* + ask [support](https://support.it4i.cz/rt/) + for authorization to enter the queue for all users associated to + her/his Project. + +- >***qfat**, the UV2000 queue. This queue is dedicated + to access the fat SGI UV2000 SMP machine. The machine (uv1) has 112 + Intel IvyBridge cores at 3.3GHz and 3.25TB RAM. An PI> *needs + explicitly* ask + [support](https://support.it4i.cz/rt/) for + authorization to enter the queue for all users associated to her/his + Project.*** +- **qfree**, the \***: The queue qfree is intended + for utilization of free resources, after a Project exhausted all its + allocated computational resources (Does not apply to DD projects + by default. DD projects have to request for persmission on qfree + after exhaustion of computational resources.). It is required that + active project is specified to enter the queue, however no remaining + resources are required. Consumed resources will be accounted to + the Project. Only 178 nodes without accelerator may be accessed from + this queue. Full nodes, 24 cores per node are allocated. The queue + runs with very low priority and no special authorization is required + to use it. The maximum runtime in qfree is 12 hours. +- **qviz**, the Visualization queue***: Intended for + pre-/post-processing using OpenGL accelerated graphics. Currently + when accessing the node, each user gets 4 cores of a CPU allocated, + thus approximately 73 GB of RAM and 1/7 of the GPU capacity + (default "chunk"). *If more GPU power or RAM is required, it is + recommended to allocate more chunks (with 4 cores each) up to one + whole node per user, so that all 28 cores, 512 GB RAM and whole GPU + is exclusive. This is currently also the maximum allowed allocation + per one user. One hour of work is allocated by default, the user may + ask for 2 hours maximum.* + + + +To access node with Xeon Phi co-processor user needs to specify that in +[job submission select +statement](job-submission-and-execution.html). + +### Notes + +The job wall clock time defaults to **half the maximum time**, see table +above. Longer wall time limits can be [set manually, see +examples](job-submission-and-execution.html). + +Jobs that exceed the reserved wall clock time (Req'd Time) get killed +automatically. Wall clock time limit can be changed for queuing jobs +(state Q) using the qalter command, however can not be changed for a +running job (state R). + +Salomon users may check current queue configuration at +<https://extranet.it4i.cz/rsweb/salomon/queues>. + +### Queue status + +Check the status of jobs, queues and compute nodes at +[https://extranet.it4i.cz/rsweb/salomon/](https://extranet.it4i.cz/rsweb/salomon) + + + + + + + +Display the queue status on Salomon: + +` +$ qstat -q +` + +The PBS allocation overview may be obtained also using the rspbs +command. + +` +$ rspbs +Usage: rspbs [options] + +Options: + --version            show program's version number and exit + -h, --help           show this help message and exit + --get-server-details Print server + --get-queues         Print queues + --get-queues-details Print queues details + --get-reservations   Print reservations + --get-reservations-details +                       Print reservations details + --get-nodes          Print nodes of PBS complex + --get-nodeset        Print nodeset of PBS complex + --get-nodes-details  Print nodes details + --get-jobs           Print jobs + --get-jobs-details   Print jobs details + --get-jobs-check-params +                       Print jobid, job state, session_id, user, nodes + --get-users          Print users of jobs + --get-allocated-nodes +                       Print allocated nodes of jobs + --get-allocated-nodeset +                       Print allocated nodeset of jobs + --get-node-users     Print node users + --get-node-jobs      Print node jobs + --get-node-ncpus     Print number of ncpus per node + --get-node-allocated-ncpus +                       Print number of allocated ncpus per node + --get-node-qlist     Print node qlist + --get-node-ibswitch  Print node ibswitch + --get-user-nodes     Print user nodes + --get-user-nodeset   Print user nodeset + --get-user-jobs      Print user jobs + --get-user-jobc      Print number of jobs per user + --get-user-nodec     Print number of allocated nodes per user + --get-user-ncpus     Print number of allocated ncpus per user + --get-qlist-nodes    Print qlist nodes + --get-qlist-nodeset  Print qlist nodeset + --get-ibswitch-nodes Print ibswitch nodes + --get-ibswitch-nodeset +                       Print ibswitch nodeset + --summary            Print summary + --get-node-ncpu-chart +                       Obsolete. Print chart of allocated ncpus per node + --server=SERVER      Use given PBS server + --state=STATE        Only for given job state + --jobid=JOBID        Only for given job ID + --user=USER          Only for given user + --node=NODE          Only for given node + --nodestate=NODESTATE +                       Only for given node state (affects only --get-node* +                       --get-qlist-* --get-ibswitch-* actions) + --incl-finished      Include finished jobs +` + +Resources Accounting Policy +------------------------------- + +### The Core-Hour + +The resources that are currently subject to accounting are the +core-hours. The core-hours are accounted on the wall clock basis. The +accounting runs whenever the computational cores are allocated or +blocked via the PBS Pro workload manager (the qsub command), regardless +of whether the cores are actually used for any calculation. 1 core-hour +is defined as 1 processor core allocated for 1 hour of wall clock time. +Allocating a full node (24 cores) for 1 hour accounts to 24 core-hours. +See example in the [Job submission and +execution](job-submission-and-execution.html) section. + +### Check consumed resources + +The **it4ifree** command is a part of it4i.portal.clients package, +located here: +<https://pypi.python.org/pypi/it4i.portal.clients> + +User may check at any time, how many core-hours have been consumed by +himself/herself and his/her projects. The command is available on +clusters' login nodes. + +` +$ it4ifree +Password: +    PID  Total Used ...by me Free +  -------- ------- ------ -------- ------- +  OPEN-0-0 1500000 400644  225265 1099356 +  DD-13-1   10000 2606 2606 7394 +` + + + diff --git a/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/rswebsalomon.png b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/rswebsalomon.png new file mode 100644 index 0000000000000000000000000000000000000000..40ccf2b42bc112a40e25a2fdbb8a0caeb3cf0f89 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/rswebsalomon.png differ diff --git a/converted/docs.it4i.cz/salomon/salomon b/converted/docs.it4i.cz/salomon/salomon new file mode 100644 index 0000000000000000000000000000000000000000..9365ab931a49eec462a6f2c24d3a86e5eaa7d9d1 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/salomon differ diff --git a/converted/docs.it4i.cz/salomon/salomon-1.jpeg b/converted/docs.it4i.cz/salomon/salomon-1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..4d2e1d70aeb1f40ddc0c75bc7cf0b825c1c75151 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/salomon-1.jpeg differ diff --git a/converted/docs.it4i.cz/salomon/salomon-2 b/converted/docs.it4i.cz/salomon/salomon-2 new file mode 100644 index 0000000000000000000000000000000000000000..00283bcbb639d32788f9e1171bda7d43f8e486bc Binary files /dev/null and b/converted/docs.it4i.cz/salomon/salomon-2 differ diff --git a/converted/docs.it4i.cz/salomon/salomon-3.jpeg b/converted/docs.it4i.cz/salomon/salomon-3.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..1849be79392df2cfa1e14fe42b5cb435b86e89d3 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/salomon-3.jpeg differ diff --git a/converted/docs.it4i.cz/salomon/salomon-4.jpeg b/converted/docs.it4i.cz/salomon/salomon-4.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..2df7b232c839cf329f39fadd2407a7f7ddfd799b Binary files /dev/null and b/converted/docs.it4i.cz/salomon/salomon-4.jpeg differ diff --git a/converted/docs.it4i.cz/salomon/sgi-c1104-gp1.jpeg b/converted/docs.it4i.cz/salomon/sgi-c1104-gp1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..820310be6a6cf96266bf056e6582c54b31185859 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/sgi-c1104-gp1.jpeg differ diff --git a/converted/docs.it4i.cz/salomon/software/ansys/AMsetPar1.png b/converted/docs.it4i.cz/salomon/software/ansys/AMsetPar1.png new file mode 100644 index 0000000000000000000000000000000000000000..f7886e6cbbcf501d87c8f363209ac72dcb8463a5 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/ansys/AMsetPar1.png differ diff --git a/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_1.jpg b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1e3ba0f25be08787a9f3d5eb8d7a89f771fad403 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_1.jpg differ diff --git a/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_2.jpg b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e8ffdf5a01337df7271a865d48dd58501eb78180 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_2.jpg differ diff --git a/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_3.jpg b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..81be6763cb35ed8dca8dad9a0a1e714ef3d8c9e3 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_3.jpg differ diff --git a/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_4.jpg b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ce4686740082b3e77070d4afe96184ab45e82637 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/ansys/Fluent_Licence_4.jpg differ diff --git a/converted/docs.it4i.cz/salomon/software/ansys/ansys-cfx.md b/converted/docs.it4i.cz/salomon/software/ansys/ansys-cfx.md new file mode 100644 index 0000000000000000000000000000000000000000..39777fa74cf823baa4e4d062eff1d37394b24d4b --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/ansys-cfx.md @@ -0,0 +1,87 @@ +ANSYS CFX +========= + +[ANSYS +CFX](http://www.ansys.com/Products/Simulation+Technology/Fluid+Dynamics/Fluid+Dynamics+Products/ANSYS+CFX) +software is a high-performance, general purpose fluid dynamics program +that has been applied to solve wide-ranging fluid flow problems for over +20 years. At the heart of ANSYS CFX is its advanced solver technology, +the key to achieving reliable and accurate solutions quickly and +robustly. The modern, highly parallelized solver is the foundation for +an abundant choice of physical models to capture virtually any type of +phenomena related to fluid flow. The solver and its many physical models +are wrapped in a modern, intuitive, and flexible GUI and user +environment, with extensive capabilities for customization and +automation using session files, scripting and a powerful expression +language. + +To run ANSYS CFX in batch mode you can utilize/modify the default +cfx.pbs script and execute it via the qsub command. + + #!/bin/bash + #PBS -l nodes=2:ppn=24 + #PBS -q qprod + #PBS -N $USER-CFX-Project + #PBS -A OPEN-0-0 + + #! Mail to user when job terminate or abort + #PBS -m ae + + #!change the working directory (default is home directory) + #cd <working directory> (working directory must exists) + WORK_DIR="/scratch/work/user/$USER" + cd $WORK_DIR + + echo Running on host `hostname` + echo Time is `date` + echo Directory is `pwd` + echo This jobs runs on the following processors: + echo `cat $PBS_NODEFILE` + + module load ANSYS + + #### Set number of processors per host listing + procs_per_host=24 + #### Create host list + hl="" + for host in `cat $PBS_NODEFILE` + do + if [ "$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi + done + + echo Machines: $hl + + # prevent ANSYS from attempting to use scif0 interface + export MPI_IC_ORDER="UDAPL" + + #-dev input.def includes the input of CFX analysis in DEF format + #-P the name of prefered license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial)) + cfx5solve -def input.def -size 4 -size-ni 4x -part-large -start-method "Platform MPI Distributed Parallel" -par-dist $hl -P aa_r + +Header of the pbs file (above) is common and description can be find +[this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +SVS FEM recommends to utilize sources by keywords: nodes, ppn. These +keywords allows to address directly the number of nodes (computers) and +cores (ppn) which will be utilized in the job. Also the rest of code +assumes such structure of allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. >Input file has to be defined by common +CFX def file which is attached to the cfx solver via parameter +-def + +License** should be selected by parameter -P (Big letter **P**). +Licensed products are the following: aa_r +(ANSYS **Academic Research), ane3fl (ANSYS +Multiphysics)-**Commercial. +[More about licensing here](licensing.html) + + We have observed that the -P settings does not always work. Please set +your [license +preferences](setting-license-preferences.html) instead. + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/ansys-fluent.md b/converted/docs.it4i.cz/salomon/software/ansys/ansys-fluent.md new file mode 100644 index 0000000000000000000000000000000000000000..bbacf3ea538d8b030f0c1504601ab9869e6dd166 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/ansys-fluent.md @@ -0,0 +1,206 @@ +ANSYS Fluent +============ + +[ANSYS +Fluent](http://www.ansys.com/Products/Simulation+Technology/Fluid+Dynamics/Fluid+Dynamics+Products/ANSYS+Fluent) +software contains the broad physical modeling capabilities needed to +model flow, turbulence, heat transfer, and reactions for industrial +applications ranging from air flow over an aircraft wing to combustion +in a furnace, from bubble columns to oil platforms, from blood flow to +semiconductor manufacturing, and from clean room design to wastewater +treatment plants. Special models that give the software the ability to +model in-cylinder combustion, aeroacoustics, turbomachinery, and +multiphase systems have served to broaden its reach. + +1. Common way to run Fluent over pbs file +------------------------------------------------------ + +To run ANSYS Fluent in batch mode you can utilize/modify the +default fluent.pbs script and execute it via the qsub command. + + #!/bin/bash + #PBS -S /bin/bash + #PBS -l nodes=2:ppn=24 + #PBS -q qprod + #PBS -N Fluent-Project + #PBS -A OPEN-0-0 + + #! Mail to user when job terminate or abort + #PBS -m ae + + #!change the working directory (default is home directory) + #cd <working directory> (working directory must exists) + WORK_DIR="/scratch/work/user/$USER" + cd $WORK_DIR + + echo Running on host `hostname` + echo Time is `date` + echo Directory is `pwd` + echo This jobs runs on the following processors: + echo `cat $PBS_NODEFILE` + + #### Load ansys module so that we find the cfx5solve command + module load ANSYS + + # Use following line to specify MPI for message-passing instead + NCORES=`wc -l $PBS_NODEFILE |awk '{print $1}'` + + /apps/cae/ANSYS/16.1/v161/fluent/bin/fluent 3d -t$NCORES -cnf=$PBS_NODEFILE -g -i fluent.jou + +Header of the pbs file (above) is common and description can be find on +[this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +[SVS FEM](http://www.svsfem.cz) recommends to utilize +sources by keywords: nodes, ppn. These keywords allows to address +directly the number of nodes (computers) and cores (ppn) which will be +utilized in the job. Also the rest of code assumes such structure of +allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. Input file has to be defined by common Fluent +journal file which is attached to the Fluent solver via parameter -i +fluent.jou + +Journal file with definition of the input geometry and boundary +conditions and defined process of solution has e.g. the following +structure: + + /file/read-case aircraft_2m.cas.gz + /solve/init + init + /solve/iterate + 10 + /file/write-case-dat aircraft_2m-solution + /exit yes + +The appropriate dimension of the problem has to be set by +parameter (2d/3d). + +2. Fast way to run Fluent from command line +-------------------------------------------------------- + + fluent solver_version [FLUENT_options] -i journal_file -pbs + +This syntax will start the ANSYS FLUENT job under PBS Professional using +the qsub command in a batch manner. When +resources are available, PBS Professional will start the job and return +a job ID, usually in the form of +*job_ID.hostname*. This job ID can then be used +to query, control, or stop the job using standard PBS Professional +commands, such as qstat or +qdel. The job will be run out of the current +working directory, and all output will be written to the file +fluent.o> +*job_ID*.     + +3. Running Fluent via user's config file +---------------------------------------- + +The sample script uses a configuration file called +pbs_fluent.conf  if no command line arguments +are present. This configuration file should be present in the directory +from which the jobs are submitted (which is also the directory in which +the jobs are executed). The following is an example of what the content +of pbs_fluent.conf can be: + +` + input="example_small.flin" + case="Small-1.65m.cas" + fluent_args="3d -pmyrinet" + outfile="fluent_test.out" + mpp="true" +` + +The following is an explanation of the parameters: + + input is the name of the input +file. + + case is the name of the +.cas file that the input file will utilize. + + fluent_args are extra ANSYS FLUENT +arguments. As shown in the previous example, you can specify the +interconnect by using the -p interconnect +command. The available interconnects include +ethernet (the default), +myrinet, class="monospace"> +infiniband, vendor, +altix>, and +crayx. The MPI is selected automatically, based +on the specified interconnect. + + outfile is the name of the file to which +the standard output will be sent. + + mpp="true" will tell the job script to +execute the job across multiple processors.         + +To run ANSYS Fluent in batch mode with user's config file you can +utilize/modify the following script and execute it via the qsub +command. + +` +#!/bin/sh +#PBS -l nodes=2:ppn=24 +#PBS -1 qprod +#PBS -N Fluent-Project +#PBS -A OPEN-0-0 + + cd $PBS_O_WORKDIR + + #We assume that if they didn’t specify arguments then they should use the + #config file if [ "xx${input}${case}${mpp}${fluent_args}zz" = "xxzz" ]; then + if [ -f pbs_fluent.conf ]; then + . pbs_fluent.conf + else + printf "No command line arguments specified, " + printf "and no configuration file found. Exiting n" + fi + fi + + + #Augment the ANSYS FLUENT command line arguments case "$mpp" in + true) + #MPI job execution scenario + num_nodes=â€cat $PBS_NODEFILE | sort -u | wc -l†+ cpus=â€expr $num_nodes * $NCPUS†+ #Default arguments for mpp jobs, these should be changed to suit your + #needs. + fluent_args="-t${cpus} $fluent_args -cnf=$PBS_NODEFILE" + ;; + *) + #SMP case + #Default arguments for smp jobs, should be adjusted to suit your + #needs. + fluent_args="-t$NCPUS $fluent_args" + ;; + esac + #Default arguments for all jobs + fluent_args="-ssh -g -i $input $fluent_args" + + echo "---------- Going to start a fluent job with the following settings: + Input: $input + Case: $case + Output: $outfile + Fluent arguments: $fluent_args" + + #run the solver + /apps/cae/ANSYS/16.1/v161/fluent/bin/fluent $fluent_args > $outfile +` + +It runs the jobs out of the directory from which they are +submitted (PBS_O_WORKDIR). + +4. Running Fluent in parralel +----------------------------- + +Fluent could be run in parallel only under Academic Research +license. To do so this ANSYS Academic Research license must be placed +before ANSYS CFD license in user preferences. To make this change +[anslic_admin utility should be +run](setting-license-preferences.html). + + + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.md b/converted/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.md new file mode 100644 index 0000000000000000000000000000000000000000..6120f49e88e3dd9187c4459573cf1f83f9053228 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.md @@ -0,0 +1,84 @@ +ANSYS LS-DYNA +============= + +[ANSYS +LS-DYNA](http://www.ansys.com/Products/Simulation+Technology/Structural+Mechanics/Explicit+Dynamics/ANSYS+LS-DYNA) +software provides convenient and easy-to-use access to the +technology-rich, time-tested explicit solver without the need to contend +with the complex input requirements of this sophisticated program. +Introduced in 1996, ANSYS LS-DYNA capabilities have helped customers in +numerous industries to resolve highly intricate design +issues. >ANSYS Mechanical users have been able take advantage of +complex explicit solutions for a long time utilizing the traditional +ANSYS Parametric Design Language (APDL) environment. >These +explicit capabilities are available to ANSYS Workbench users as well. +The Workbench platform is a powerful, comprehensive, easy-to-use +environment for engineering simulation. CAD import from all sources, +geometry cleanup, automatic meshing, solution, parametric optimization, +result visualization and comprehensive report generation are all +available within a single fully interactive modern graphical user +environment. + +To run ANSYS LS-DYNA in batch mode you can utilize/modify the +default ansysdyna.pbs script and execute it via the qsub command. + + #!/bin/bash + #PBS -l nodes=2:ppn=24 + #PBS -q qprod + #PBS -N DYNA-Project + #PBS -A OPEN-0-0 + + #! Mail to user when job terminate or abort + #PBS -m ae + + #!change the working directory (default is home directory) + #cd <working directory> + WORK_DIR="/scratch/work/user/$USER" + cd $WORK_DIR + + echo Running on host `hostname` + echo Time is `date` + echo Directory is `pwd` + echo This jobs runs on the following processors: + echo `cat $PBS_NODEFILE` + + module load ANSYS + + #### Set number of processors per node + procs_per_host=24 + #### Create host list + hl="" + for host in `cat $PBS_NODEFILE` + do + if [ "$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi + done + + echo Machines: $hl + + # prevent ANSYS from attempting to use scif0 interface + export MPI_IC_ORDER="UDAPL" + + lsdyna161 -dis -usessh -machines "$hl" i=input.k + +Header of the pbs file (above) is common and description can be +find > on [this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +[SVS FEM](http://www.svsfem.cz) recommends to utilize +sources by keywords: nodes, ppn. These keywords allows to address +directly the number of nodes (computers) and cores (ppn) which will be +utilized in the job. Also the rest of code assumes such structure of +allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. Input file has to be defined by common LS-DYNA +.**k** file which is attached to the ansys solver via parameter i= + +Without setting environment variable MPI_IC_ORDER="UDAPL", ANSYS will +fail to run on nodes with Xeon Phi accelerator (it will use the virtual +interface of Phi cards instead of the real InfiniBand interface and MPI +will fail. + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.md b/converted/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.md new file mode 100644 index 0000000000000000000000000000000000000000..a33a8bf916a7290a08343efecbb3dd2f663a5f32 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.md @@ -0,0 +1,79 @@ +ANSYS MAPDL +=========== + +**[ANSYS +Multiphysics](http://www.ansys.com/Products/Simulation+Technology/Structural+Mechanics/ANSYS+Multiphysics)** +software offers a comprehensive product solution for both multiphysics +and single-physics analysis. The product includes structural, thermal, +fluid and both high- and low-frequency electromagnetic analysis. The +product also contains solutions for both direct and sequentially coupled +physics problems including direct coupled-field elements and the ANSYS +multi-field solver. + +To run ANSYS MAPDL in batch mode you can utilize/modify the +default mapdl.pbs script and execute it via the qsub command. + + #!/bin/bash + #PBS -l nodes=2:ppn=24 + #PBS -q qprod + #PBS -N ANSYS-Project + #PBS -A OPEN-0-0 + + #! Mail to user when job terminate or abort + #PBS -m ae + + #!change the working directory (default is home directory) + #cd <working directory> (working directory must exists) + WORK_DIR="/scratch/work/user/$USER" + cd $WORK_DIR + + echo Running on host `hostname` + echo Time is `date` + echo Directory is `pwd` + echo This jobs runs on the following processors: + echo `cat $PBS_NODEFILE` + + module load ANSYS/16.1 + + #### Set number of processors per host listing + procs_per_host=24 + #### Create host list + hl="" + for host in `cat $PBS_NODEFILE` + do + if [ "$hl" = "" ] + then hl="$host:$procs_per_host" + else hl="${hl}:$host:$procs_per_host" + fi + done + + echo Machines: $hl + + # prevent ANSYS from attempting to use scif0 interface + export MPI_IC_ORDER="UDAPL" + + #-i input.dat includes the input of analysis in APDL format + #-o file.out is output file from ansys where all text outputs will be redirected + #-p the name of license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial), aa_r_dy=Academic AUTODYN) + ansys161 -b -dis -usessh -p aa_r -i input.dat -o file.out -machines "$hl" -dir $WORK_DIR + +Header of the PBS file (above) is common and description can be find on +[this +site](../../resource-allocation-and-job-execution/job-submission-and-execution.html). +[SVS FEM](http://www.svsfem.cz) recommends to utilize +sources by keywords: nodes, ppn. These keywords allows to address +directly the number of nodes (computers) and cores (ppn) which will be +utilized in the job. Also the rest of code assumes such structure of +allocated resources. + +Working directory has to be created before sending pbs job into the +queue. Input file should be in working directory or full path to input +file has to be specified. Input file has to be defined by common APDL +file which is attached to the ansys solver via parameter -i + +License** should be selected by parameter -p. Licensed products are +the following: aa_r (ANSYS **Academic Research), ane3fl (ANSYS +Multiphysics)-**Commercial**, aa_r_dy (ANSYS **Academic +AUTODYN)> +[More about licensing here](licensing.html) + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.md b/converted/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.md new file mode 100644 index 0000000000000000000000000000000000000000..43dffc3a14231d9bb0e2e31dcfc09d6e3bd5f759 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.md @@ -0,0 +1,31 @@ +Overview of ANSYS Products +========================== + +[SVS FEM](http://www.svsfem.cz/)** as **[ANSYS +Channel partner](http://www.ansys.com/)** for Czech +Republic provided all ANSYS licenses for all clusters and supports of +all ANSYS Products (Multiphysics, Mechanical, MAPDL, CFX, Fluent, +Maxwell, LS-DYNA...) to IT staff and ANSYS users. If you are challenging +to problem of ANSYS functionality contact +please [hotline@svsfem.cz](mailto:hotline@svsfem.cz?subject=Ostrava%20-%20ANSELM) + +The clusters provides as commercial as academic variants. Academic +variants are distinguished by "**Academic...**" word in the name of + license or by two letter preposition "**aa_**" in the license feature +name. Change of license is realized on command line respectively +directly in user's pbs file (see individual products). [More about +licensing here](licensing.html) + +To load the latest version of any ANSYS product (Mechanical, Fluent, +CFX, MAPDL,...) load the module: + + $ module load ANSYS + +ANSYS supports interactive regime, but due to assumed solution of +extremely difficult tasks it is not recommended. + +If user needs to work in interactive regime we recommend to configure +the RSM service on the client machine which allows to forward the +solution to the clusters directly from the client's Workbench project +(see ANSYS RSM service). + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/ansys.md b/converted/docs.it4i.cz/salomon/software/ansys/ansys.md new file mode 100644 index 0000000000000000000000000000000000000000..93f9c151da204a8a03bf3d4696f63f151ec898da --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/ansys.md @@ -0,0 +1,31 @@ +Overview of ANSYS Products +========================== + +[SVS FEM](http://www.svsfem.cz/)** as **[ANSYS +Channel partner](http://www.ansys.com/)** for Czech +Republic provided all ANSYS licenses for all clusters and supports of +all ANSYS Products (Multiphysics, Mechanical, MAPDL, CFX, Fluent, +Maxwell, LS-DYNA...) to IT staff and ANSYS users. If you are challenging +to problem of ANSYS functionality contact +please [hotline@svsfem.cz](mailto:hotline@svsfem.cz?subject=Ostrava%20-%20ANSELM) + +The clusters provides as commercial as academic variants. Academic +variants are distinguished by "**Academic...**" word in the name of + license or by two letter preposition "**aa_**" in the license feature +name. Change of license is realized on command line respectively +directly in user's pbs file (see individual products). [More about +licensing here](ansys/licensing.html) + +To load the latest version of any ANSYS product (Mechanical, Fluent, +CFX, MAPDL,...) load the module: + + $ module load ANSYS + +ANSYS supports interactive regime, but due to assumed solution of +extremely difficult tasks it is not recommended. + +If user needs to work in interactive regime we recommend to configure +the RSM service on the client machine which allows to forward the +solution to the clusters directly from the client's Workbench project +(see ANSYS RSM service). + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/licensing.md b/converted/docs.it4i.cz/salomon/software/ansys/licensing.md new file mode 100644 index 0000000000000000000000000000000000000000..8ee4bc1c61dc30f9579e90c4c646752c25513f3d --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/licensing.md @@ -0,0 +1,45 @@ +Licensing and Available Versions +================================ + +ANSYS licence can be used by: +----------------------------- + +- all persons in the carrying out of the CE IT4Innovations Project (In + addition to the primary licensee, which is VSB - Technical + University of Ostrava, users are CE IT4Innovations third parties - + CE IT4Innovations project partners, particularly the University of + Ostrava, the Brno University of Technology - Faculty of Informatics, + the Silesian University in Opava, Institute of Geonics AS CR.) +- all + persons who have a valid + license +- students + of the Technical University + +ANSYS Academic Research +----------------------- + +The licence intended to be used for science and research, publications, +students’ projects (academic licence). + +ANSYS COM +--------- + +The licence intended to be used for science and research, publications, +students’ projects, commercial research with no commercial use +restrictions. + +Available Versions +------------------ + +- 16.1 +- 17.0 + +License Preferences +------------------- + +Please [see this page to set license +preferences](setting-license-preferences.html). + + + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.md b/converted/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.md new file mode 100644 index 0000000000000000000000000000000000000000..5c6d63842d2d5b6e76f056815beebba89e8e8989 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.md @@ -0,0 +1,34 @@ +Setting license preferences +=========================== + +Some ANSYS tools allow you to explicitly specify usage of academic or +commercial licenses in the command line (eg. +ansys161 -p aa_r to select Academic Research +license). However, we have observed that not all tools obey this option +and choose commercial license. + +Thus you need to configure preferred license order with ANSLIC_ADMIN. +Please follow these steps and move Academic Research license to the  top +or bottom of the list accordingly. + +Launch the ANSLIC_ADMIN utility in a graphical environment: + + $ANSYSLIC_DIR/lic_admin/anslic_admin + +ANSLIC_ADMIN Utility will be run + + + + + + + + + +ANSYS Academic Research license should be moved up to the top or down to +the bottom of the list. + + + + + diff --git a/converted/docs.it4i.cz/salomon/software/ansys/workbench.md b/converted/docs.it4i.cz/salomon/software/ansys/workbench.md new file mode 100644 index 0000000000000000000000000000000000000000..78ec8caa5d83c589a2542035fac57d0cd997b75a --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/ansys/workbench.md @@ -0,0 +1,74 @@ +Workbench +========= + +Workbench Batch Mode +-------------------- + +It is possible to run Workbench scripts in batch mode. You need to +configure solvers of individual components to run in parallel mode. Open +your project in Workbench. Then, for example, in Mechanical, go to Tools +- Solve Process Settings ..., click Advanced button as shown on the +screenshot. + + + +Enable Distribute Solution checkbox and enter number of cores (eg. 48 to +run on two Salomon nodes). If you want the job to run on more then 1 +node, you must also provide a so called MPI appfile. In the Additional +Command Line Arguments input field, enter : + + -mpifile /path/to/my/job/mpifile.txt + +Where /path/to/my/job is the directory where your project is saved. We +will create the file mpifile.txt programatically later in the batch +script. For more information, refer to *ANSYS Mechanical APDL Parallel +Processing* *Guide*. + +Now, save the project and close Workbench. We will use this script to +launch the job: + + #!/bin/bash + #PBS -l select=2:ncpus=24 + #PBS -q qprod + #PBS -N test9_mpi_2 + #PBS -A OPEN-0-0 + + # Mail to user when job terminate or abort + #PBS -m a + + # change the working directory + WORK_DIR="$PBS_O_WORKDIR" + cd $WORK_DIR + + echo Running on host `hostname` + echo Time is `date` + echo Directory is `pwd` + echo This jobs runs on the following nodes: + echo `cat $PBS_NODEFILE` + + module load ANSYS + + #### Set number of processors per host listing + procs_per_host=24 + #### Create MPI appfile + echo -n "" > mpifile.txt + for host in `cat $PBS_NODEFILE` + do + echo "-h $host -np $procs_per_host $ANSYS160_DIR/bin/ansysdis161 -dis" > mpifile.txt + done + + #-i input.dat includes the input of analysis in APDL format + #-o file.out is output file from ansys where all text outputs will be redirected + #-p the name of license feature (aa_r=ANSYS Academic Research, ane3fl=Multiphysics(commercial), aa_r_dy=Academic AUTODYN) + + # prevent using scsif0 interface on accelerated nodes + export MPI_IC_ORDER="UDAPL" + # spawn remote process using SSH (default is RSH) + export MPI_REMSH="/usr/bin/ssh" + + runwb2 -R jou6.wbjn -B -F test9.wbpj + +The solver settings are saved in file solvehandlers.xml, which is not +located in the project directory. Verify your solved settings when +uploading a project from your local computer. + diff --git a/converted/docs.it4i.cz/salomon/software/chemistry/molpro.md b/converted/docs.it4i.cz/salomon/software/chemistry/molpro.md new file mode 100644 index 0000000000000000000000000000000000000000..44f8d92337f0c4f4ae4b4dd0d4e90cab4d53ce28 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/chemistry/molpro.md @@ -0,0 +1,91 @@ +Molpro +====== + +Molpro is a complete system of ab initio programs for molecular +electronic structure calculations. + +About Molpro +------------ + +Molpro is a software package used for accurate ab-initio quantum +chemistry calculations. More information can be found at the [official +webpage](http://www.molpro.net/). + +License +------- + +Molpro software package is available only to users that have a valid +license. Please contact support to enable access to Molpro if you have a +valid license appropriate for running on our cluster (eg. >academic +research group licence, parallel execution). + +To run Molpro, you need to have a valid license token present in +" $HOME/.molpro/token". You can +download the token from [Molpro +website](https://www.molpro.net/licensee/?portal=licensee). + +Installed version +----------------- + +Currently on Salomon is installed version 2010.1, patch level 57, +parallel version compiled with Intel compilers and Intel MPI. + +Compilation parameters are default : + + |Parameter|Value| + ------------------------------------------- |---|---|------------------- + |max number of atoms|200| + |max number of valence orbitals|300| + |max number of basis functions|4095| + |max number of states per symmmetry|20| + |max number of state symmetries|16| + |max number of records|200| + |max number of primitives|maxbfn x [2]| + + + +Running +------- + +Molpro is compiled for parallel execution using MPI and OpenMP. By +default, Molpro reads the number of allocated nodes from PBS and +launches a data server on one node. On the remaining allocated nodes, +compute processes are launched, one process per node, each with 16 +threads. You can modify this behavior by using -n, -t and helper-server +options. Please refer to the [Molpro +documentation](http://www.molpro.net/info/2010.1/doc/manual/node9.html) +for more details. + +The OpenMP parallelization in Molpro is limited and has been observed to +produce limited scaling. We therefore recommend to use MPI +parallelization only. This can be achieved by passing option +mpiprocs=24:ompthreads=1 to PBS. + +You are advised to use the -d option to point to a directory in [SCRATCH +filesystem](../../storage.html). Molpro can produce a +large amount of temporary data during its run, and it is important that +these are placed in the fast scratch filesystem. + +### Example jobscript + + #PBS -A IT4I-0-0 + #PBS -q qprod + #PBS -l select=1:ncpus=24:mpiprocs=24:ompthreads=1 + + cd $PBS_O_WORKDIR + + # load Molpro module + module add Molpro/2010.1-patch-57-intel2015b + + # create a directory in the SCRATCH filesystem + mkdir -p /scratch/work/user/$USER/$PBS_JOBID + + # copy an example input + cp /apps/all/Molpro/2010.1-patch57/molprop_2010_1_Linux_x86_64_i8/examples/caffeine_opt_diis.com . + + # run Molpro with default options + molpro -d /scratch/work/user/$USER/$PBS_JOBID caffeine_opt_diis.com + + # delete scratch directory + rm -rf /scratch/work/user/$USER/$PBS_JOBID + diff --git a/converted/docs.it4i.cz/salomon/software/chemistry/nwchem.md b/converted/docs.it4i.cz/salomon/software/chemistry/nwchem.md new file mode 100644 index 0000000000000000000000000000000000000000..b37cb96e6ff2cfcabcfcf4e78a741e1a86e8eaf8 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/chemistry/nwchem.md @@ -0,0 +1,66 @@ +NWChem +====== + +High-Performance Computational Chemistry + +Introduction +------------------------- + +NWChem aims to provide its users with computational chemistry +tools that are scalable both in their ability to treat large scientific +computational chemistry problems efficiently, and in their use of +available parallel computing resources from high-performance parallel +supercomputers to conventional workstation clusters. + +[Homepage](http://www.nwchem-sw.org/index.php/Main_Page) + +Installed versions +------------------ + +The following versions are currently installed : + +- >NWChem/6.3.revision2-2013-10-17-Python-2.7.8, current release. + Compiled with Intel compilers, MKL and Intel MPI + +  + +- >NWChem/6.5.revision26243-intel-2015b-2014-09-10-Python-2.7.8 + +For a current list of installed versions, execute : + + module avail NWChem + +The recommend to use version 6.5. Version 6.3 fails on Salomon nodes +with accelerator, because it attempts to communicate over scif0 +interface. In 6.5 this is avoided by +setting ARMCI_OPENIB_DEVICE=mlx4_0, this setting is included in the +module. + +Running +------- + +NWChem is compiled for parallel MPI execution. Normal procedure for MPI +jobs applies. Sample jobscript : + + #PBS -A IT4I-0-0 + #PBS -q qprod + #PBS -l select=1:ncpus=24:mpiprocs=24 + + cd $PBS_O_WORKDIR + module add NWChem/6.5.revision26243-intel-2015b-2014-09-10-Python-2.7.8 + mpirun nwchem h2o.nw + +Options +-------------------- + +Please refer to [the +documentation](http://www.nwchem-sw.org/index.php/Release62:Top-level) and +in the input file set the following directives : + +- >MEMORY : controls the amount of memory NWChem will use +- >SCRATCH_DIR : set this to a directory in [SCRATCH + filesystem](../../storage.html) (or run the + calculation completely in a scratch directory). For certain + calculations, it might be advisable to reduce I/O by forcing + "direct" mode, eg. "scf direct" + diff --git a/converted/docs.it4i.cz/salomon/software/chemistry/phono3py.md b/converted/docs.it4i.cz/salomon/software/chemistry/phono3py.md new file mode 100644 index 0000000000000000000000000000000000000000..14148e5a9e3f53cf593765a7b697a35fc5ab2be7 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/chemistry/phono3py.md @@ -0,0 +1,202 @@ +Phono3py +======== + + + + Introduction +------------- + +This GPL software calculates phonon-phonon interactions via the third +order force constants. It allows to obtain lattice thermal conductivity, +phonon lifetime/linewidth, imaginary part of self energy at the lowest +order, joint density of states (JDOS) and weighted-JDOS. For details see +Phys. Rev. B 91, 094306 (2015) and +http://atztogo.github.io/phono3py/index.html + +Load the phono3py/0.9.14-ictce-7.3.5-Python-2.7.9 module + +` +$ module load phono3py/0.9.14-ictce-7.3.5-Python-2.7.9 +` + +Example of calculating thermal conductivity of Si using VASP code. +------------------------------------------------------------------ + +### Calculating force constants + +One needs to calculate second order and third order force constants +using the diamond structure of silicon stored in +[POSCAR](phono3py-input/poscar-si) (the same form as in +VASP) using single displacement calculations within supercell. + +` +$ cat POSCAR + Si +  1.0 +    5.4335600309153529   0.0000000000000000   0.0000000000000000 +    0.0000000000000000   5.4335600309153529   0.0000000000000000 +    0.0000000000000000   0.0000000000000000   5.4335600309153529 + Si +  8 +Direct +  0.8750000000000000 0.8750000000000000 0.8750000000000000 +  0.8750000000000000 0.3750000000000000 0.3750000000000000 +  0.3750000000000000 0.8750000000000000 0.3750000000000000 +  0.3750000000000000 0.3750000000000000 0.8750000000000000 +  0.1250000000000000 0.1250000000000000 0.1250000000000000 +  0.1250000000000000 0.6250000000000000 0.6250000000000000 +  0.6250000000000000 0.1250000000000000 0.6250000000000000 +  0.6250000000000000 0.6250000000000000 0.1250000000000000 +` + +### Generating displacement using 2x2x2 supercell for both second and third order force constants + +` +$ phono3py -d --dim="2 2 2" -c POSCAR +` + + 111 displacements is created stored in +disp_fc3.yaml, and the structure input files with this +displacements are POSCAR-00XXX, where the XXX=111. + +` +disp_fc3.yaml POSCAR-00008 POSCAR-00017 POSCAR-00026 POSCAR-00035 POSCAR-00044 POSCAR-00053 POSCAR-00062 POSCAR-00071 POSCAR-00080 POSCAR-00089 POSCAR-00098 POSCAR-00107 +POSCAR        POSCAR-00009 POSCAR-00018 POSCAR-00027 POSCAR-00036 POSCAR-00045 POSCAR-00054 POSCAR-00063 POSCAR-00072 POSCAR-00081 POSCAR-00090 POSCAR-00099 POSCAR-00108 +POSCAR-00001  POSCAR-00010 POSCAR-00019 POSCAR-00028 POSCAR-00037 POSCAR-00046 POSCAR-00055 POSCAR-00064 POSCAR-00073 POSCAR-00082 POSCAR-00091 POSCAR-00100 POSCAR-00109 +POSCAR-00002  POSCAR-00011 POSCAR-00020 POSCAR-00029 POSCAR-00038 POSCAR-00047 POSCAR-00056 POSCAR-00065 POSCAR-00074 POSCAR-00083 POSCAR-00092 POSCAR-00101 POSCAR-00110 +POSCAR-00003  POSCAR-00012 POSCAR-00021 POSCAR-00030 POSCAR-00039 POSCAR-00048 POSCAR-00057 POSCAR-00066 POSCAR-00075 POSCAR-00084 POSCAR-00093 POSCAR-00102 POSCAR-00111 +POSCAR-00004  POSCAR-00013 POSCAR-00022 POSCAR-00031 POSCAR-00040 POSCAR-00049 POSCAR-00058 POSCAR-00067 POSCAR-00076 POSCAR-00085 POSCAR-00094 POSCAR-00103 +POSCAR-00005  POSCAR-00014 POSCAR-00023 POSCAR-00032 POSCAR-00041 POSCAR-00050 POSCAR-00059 POSCAR-00068 POSCAR-00077 POSCAR-00086 POSCAR-00095 POSCAR-00104 +POSCAR-00006  POSCAR-00015 POSCAR-00024 POSCAR-00033 POSCAR-00042 POSCAR-00051 POSCAR-00060 POSCAR-00069 POSCAR-00078 POSCAR-00087 POSCAR-00096 POSCAR-00105 +POSCAR-00007  POSCAR-00016 POSCAR-00025 POSCAR-00034 POSCAR-00043 POSCAR-00052 POSCAR-00061 POSCAR-00070 POSCAR-00079 POSCAR-00088 POSCAR-00097 POSCAR-00106 +` + + For each displacement the forces needs to be +calculated, i.e. in form of the output file of VASP (vasprun.xml). For a +single VASP calculations one needs +[KPOINTS](phono3py-input/KPOINTS), +[POTCAR](phono3py-input/POTCAR), +[INCAR](phono3py-input/INCAR) in your case directory +(where you have POSCARS) and those 111 displacements calculations can be +generated by [prepare.sh](phono3py-input/prepare.sh) +script. Then each of the single 111 calculations is submitted +[run.sh](phono3py-input/run.sh) by +[submit.sh](phono3py-input/submit.sh). + +` +$./prepare.sh +$ls +disp-00001 disp-00009 disp-00017 disp-00025 disp-00033 disp-00041 disp-00049 disp-00057 disp-00065 disp-00073 disp-00081 disp-00089 disp-00097 disp-00105    INCAR +disp-00002 disp-00010 disp-00018 disp-00026 disp-00034 disp-00042 disp-00050 disp-00058 disp-00066 disp-00074 disp-00082 disp-00090 disp-00098 disp-00106    KPOINTS +disp-00003 disp-00011 disp-00019 disp-00027 disp-00035 disp-00043 disp-00051 disp-00059 disp-00067 disp-00075 disp-00083 disp-00091 disp-00099 disp-00107    POSCAR +disp-00004 disp-00012 disp-00020 disp-00028 disp-00036 disp-00044 disp-00052 disp-00060 disp-00068 disp-00076 disp-00084 disp-00092 disp-00100 disp-00108    POTCAR +disp-00005 disp-00013 disp-00021 disp-00029 disp-00037 disp-00045 disp-00053 disp-00061 disp-00069 disp-00077 disp-00085 disp-00093 disp-00101 disp-00109    prepare.sh +disp-00006 disp-00014 disp-00022 disp-00030 disp-00038 disp-00046 disp-00054 disp-00062 disp-00070 disp-00078 disp-00086 disp-00094 disp-00102 disp-00110    run.sh +disp-00007 disp-00015 disp-00023 disp-00031 disp-00039 disp-00047 disp-00055 disp-00063 disp-00071 disp-00079 disp-00087 disp-00095 disp-00103 disp-00111    submit.sh +disp-00008 disp-00016 disp-00024 disp-00032 disp-00040 disp-00048 disp-00056 disp-00064 disp-00072 disp-00080 disp-00088 disp-00096 disp-00104 disp_fc3.yaml +` + + Taylor your run.sh script to fit into your project and +other needs and submit all 111 calculations using submit.sh +script + +` +$ ./submit.sh +` + + Collecting results and post-processing with phono3py +--------------------------------------------------------------------------- + + Once all jobs are finished and vasprun.xml is created in +each disp-XXXXX directory the collection is done by + +` +$ phono3py --cf3 disp-{00001..00111}/vasprun.xml +` + + and +`disp_fc2.yaml, FORCES_FC2`, `FORCES_FC3`{.docutils +.literal} and disp_fc3.yaml should appear and put into the hdf +format by + +` +$ phono3py --dim="2 2 2" -c POSCAR +` + +resulting in `fc2.hdf5` and `fc3.hdf5`{.docutils +.literal} + +### Thermal conductivity + + The phonon lifetime calculations takes some time, +however is independent on grid points, so could be splitted: + +` +$ phono3py --fc3 --fc2 --dim="2 2 2" --mesh="9 9 9" --sigma 0.1 --wgp +` + +### Inspecting ir_grid_points.yaml + +` +$ grep grid_point ir_grid_points.yaml +num_reduced_ir_grid_points: 35 +ir_grid_points: # [address, weight] +- grid_point: 0 +- grid_point: 1 +- grid_point: 2 +- grid_point: 3 +- grid_point: 4 +- grid_point: 10 +- grid_point: 11 +- grid_point: 12 +- grid_point: 13 +- grid_point: 20 +- grid_point: 21 +- grid_point: 22 +- grid_point: 30 +- grid_point: 31 +- grid_point: 40 +- grid_point: 91 +- grid_point: 92 +- grid_point: 93 +- grid_point: 94 +- grid_point: 101 +- grid_point: 102 +- grid_point: 103 +- grid_point: 111 +- grid_point: 112 +- grid_point: 121 +- grid_point: 182 +- grid_point: 183 +- grid_point: 184 +- grid_point: 192 +- grid_point: 193 +- grid_point: 202 +- grid_point: 273 +- grid_point: 274 +- grid_point: 283 +- grid_point: 364 +` + +one finds which grid points needed to be calculated, for instance using +following + +` +$ phono3py --fc3 --fc2 --dim="2 2 2" --mesh="9 9 9" -c POSCAR --sigma 0.1 --br --write-gamma --gp="0 1 2 +` + + one calculates grid points 0, 1, 2. To automize one can +use for instance scripts to submit 5 points in series, see +[gofree-cond1.sh](phono3py-input/gofree-cond1.sh) + +` +$ qsub gofree-cond1.sh +` + + Finally the thermal conductivity result is produced by +grouping single conductivity per grid calculations using + +` +$ phono3py --fc3 --fc2 --dim="2 2 2" --mesh="9 9 9" --br --read_gamma +` + diff --git a/converted/docs.it4i.cz/salomon/software/compilers.md b/converted/docs.it4i.cz/salomon/software/compilers.md new file mode 100644 index 0000000000000000000000000000000000000000..125d99238603c065fc4d9b1f7845ea442c93ebf4 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/compilers.md @@ -0,0 +1,194 @@ +Compilers +========= + +Available compilers, including GNU, INTEL and UPC compilers + + + +There are several compilers for different programming languages +available on the cluster: + +- C/C++ +- Fortran 77/90/95/HPF +- Unified Parallel C +- Java + +The C/C++ and Fortran compilers are provided by: + +Opensource: + +- GNU GCC +- Clang/LLVM + +Commercial licenses: + +- Intel +- PGI + +Intel Compilers +--------------- + +For information about the usage of Intel Compilers and other Intel +products, please read the [Intel Parallel +studio](intel-suite.html) page. + +PGI Compilers +------------- + +The Portland Group Cluster Development Kit (PGI CDK) is available. + + $ module load PGI + $ pgcc -v + $ pgc++ -v + $ pgf77 -v + $ pgf90 -v + $ pgf95 -v + $ pghpf -v + +The PGI CDK also incudes tools for debugging and profiling. + +PGDBG OpenMP/MPI debugger and PGPROF OpenMP/MPI profiler are available + + $ module load PGI + $ module load Java + $ pgdbg & + $ pgprof & + +For more information, see the [PGI +page](http://www.pgroup.com/products/pgicdk.htm). + +GNU +--- + +For compatibility reasons there are still available the original (old +4.4.7-11) versions of GNU compilers as part of the OS. These are +accessible in the search path by default. + +It is strongly recommended to use the up to date version which comes +with the module GCC: + + $ module load GCC + $ gcc -v + $ g++ -v + $ gfortran -v + +With the module loaded two environment variables are predefined. One for +maximum optimizations on the cluster's architecture, and the other for +debugging purposes: + + $ echo $OPTFLAGS + -O3 -march=native + + $ echo $DEBUGFLAGS + -O0 -g + +For more information about the possibilities of the compilers, please +see the man pages. + +Unified Parallel C +------------------ + +UPC is supported by two compiler/runtime implementations: + +- GNU - SMP/multi-threading support only +- Berkley - multi-node support as well as SMP/multi-threading support + +### GNU UPC Compiler + +To use the GNU UPC compiler and run the compiled binaries use the module +gupc + + $ module add gupc + $ gupc -v + $ g++ -v + +Simple program to test the compiler + + $ cat count.upc + + /* hello.upc - a simple UPC example */ + #include <upc.h> + #include <stdio.h> + + int main() { +  if (MYTHREAD == 0) { +    printf("Welcome to GNU UPC!!!n"); +  } +  upc_barrier; +  printf(" - Hello from thread %in", MYTHREAD); +  return 0; + } + +To compile the example use + + $ gupc -o count.upc.x count.upc + +To run the example with 5 threads issue + + $ ./count.upc.x -fupc-threads-5 + +For more informations see the man pages. + +### Berkley UPC Compiler + +To use the Berkley UPC compiler and runtime environment to run the +binaries use the module bupc + + $ module add BerkeleyUPC/2.16.2-gompi-2015b + $ upcc -version + +As default UPC network the "smp" is used. This is very quick and easy +way for testing/debugging, but limited to one node only. + +For production runs, it is recommended to use the native Infiband +implementation of UPC network "ibv". For testing/debugging using +multiple nodes, the "mpi" UPC network is recommended. Please note, that +the selection of the network is done at the compile time** and not at +runtime (as expected)! + +Example UPC code: + + $ cat hello.upc + + /* hello.upc - a simple UPC example */ + #include <upc.h> + #include <stdio.h> + + int main() { +  if (MYTHREAD == 0) { +    printf("Welcome to Berkeley UPC!!!n"); +  } +  upc_barrier; +  printf(" - Hello from thread %in", MYTHREAD); +  return 0; + } + +To compile the example with the "ibv" UPC network use + + $ upcc -network=ibv -o hello.upc.x hello.upc + +To run the example with 5 threads issue + + $ upcrun -n 5 ./hello.upc.x + +To run the example on two compute nodes using all 48 cores, with 48 +threads, issue + + $ qsub -I -q qprod -A PROJECT_ID -l select=2:ncpus=24 + $ module add bupc + $ upcrun -n 48 ./hello.upc.x + + For more informations see the man pages. + +Java +---- + +For information how to use Java (runtime and/or compiler), please read +the [Java page](java.html). + +nVidia CUDA + +For information how to work with nVidia CUDA, please read the [nVidia +CUDA +page](../../anselm-cluster-documentation/software/nvidia-cuda.html). + diff --git a/converted/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.md b/converted/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.md new file mode 100644 index 0000000000000000000000000000000000000000..5948a933f5fc8c928e5c34e481359a1fa446601f --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.md @@ -0,0 +1,204 @@ +COMSOL Multiphysics® +==================== + + + +Introduction + +------------------------- + +[COMSOL](http://www.comsol.com) +is a powerful environment for modelling and solving various engineering +and scientific problems based on partial differential equations. COMSOL +is designed to solve coupled or multiphysics phenomena. For many +standard engineering problems COMSOL provides add-on products such as +electrical, mechanical, fluid flow, and chemical +applications. + +- >[Structural Mechanics + Module](http://www.comsol.com/structural-mechanics-module), + + +- >[Heat Transfer + Module](http://www.comsol.com/heat-transfer-module), + + +- >[CFD + Module](http://www.comsol.com/cfd-module), + + +- >[Acoustics + Module](http://www.comsol.com/acoustics-module), + + +- >and [many + others](http://www.comsol.com/products) + +COMSOL also allows an +interface support for +equation-based modelling of +partial differential +equations. + +Execution + +---------------------- + +On the clusters COMSOL is available in the latest stable +version. There are two variants of the release: + +- >**Non commercial** or so + called >**EDU + variant**>, which can be used for research + and educational purposes. + +- >**Commercial** or so called + >**COM variant**, + which can used also for commercial activities. + >**COM variant** + has only subset of features compared to the + >**EDU + variant**> available. + + More about + licensing will be posted here + soon. + + +To load the of COMSOL load the module + +` +$ module load COMSOL/51-EDU +` + +By default the **EDU +variant**> will be loaded. If user needs other +version or variant, load the particular version. To obtain the list of +available versions use + +` +$ module avail COMSOL +` + +If user needs to prepare COMSOL jobs in the interactive mode +it is recommend to use COMSOL on the compute nodes via PBS Pro +scheduler. In order run the COMSOL Desktop GUI on Windows is recommended +to use the [Virtual Network Computing +(VNC)](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html). + +` +$ xhost + +$ qsub -I -X -A PROJECT_ID -q qprod -l select=1:ppn=24 +$ module load COMSOL +$ comsol +` + +To run COMSOL in batch mode, without the COMSOL Desktop GUI +environment, user can utilized the default (comsol.pbs) job script and +execute it via the qsub command. + +` +#!/bin/bash +#PBS -l select=3:ppn=24 +#PBS -q qprod +#PBS -N JOB_NAME +#PBS -A PROJECT_ID + +cd /scratch/work/user/$USER/ || exit + +echo Time is `date` +echo Directory is `pwd` +echo '**PBS_NODEFILE***START*******' +cat $PBS_NODEFILE +echo '**PBS_NODEFILE***END*********' + +text_nodes < cat $PBS_NODEFILE + +module load COMSOL +# module load COMSOL/51-EDU + +ntask=$(wc -l $PBS_NODEFILE) + +comsol -nn ${ntask} batch -configuration /tmp –mpiarg –rmk –mpiarg pbs -tmpdir /scratch/$USER/ -inputfile name_input_f.mph -outputfile name_output_f.mph -batchlog name_log_f.log +` + +Working directory has to be created before sending the +(comsol.pbs) job script into the queue. Input file (name_input_f.mph) +has to be in working directory or full path to input file has to be +specified. The appropriate path to the temp directory of the job has to +be set by command option (-tmpdir). + +LiveLink™* *for MATLAB®^ +------------------------- + +COMSOL is the software package for the numerical solution of +the partial differential equations. LiveLink for MATLAB allows +connection to the +COMSOL>><span><span><span><span>**®**</span>^ +API (Application Programming Interface) with the benefits of the +programming language and computing environment of the MATLAB. + +LiveLink for MATLAB is available in both +**EDU** and +**COM** +**variant** of the +COMSOL release. On the clusters 1 commercial +(>**COM**) license +and the 5 educational +(>**EDU**) licenses +of LiveLink for MATLAB (please see the [ISV +Licenses](../isv_licenses.html)) are available. +Following example shows how to start COMSOL model from MATLAB via +LiveLink in the interactive mode. + +` +$ xhost + +$ qsub -I -X -A PROJECT_ID -q qexp -l select=1:ppn=24 +$ module load MATLAB +$ module load COMSOL +$ comsol server MATLAB +` + +At the first time to launch the LiveLink for MATLAB +(client-MATLAB/server-COMSOL connection) the login and password is +requested and this information is not requested again. + +To run LiveLink for MATLAB in batch mode with +(comsol_matlab.pbs) job script you can utilize/modify the following +script and execute it via the qsub command. + +` +#!/bin/bash +#PBS -l select=3:ppn=24 +#PBS -q qprod +#PBS -N JOB_NAME +#PBS -A PROJECT_ID + +cd /scratch/work/user/$USER || exit + +echo Time is `date` +echo Directory is `pwd` +echo '**PBS_NODEFILE***START*******' +cat $PBS_NODEFILE +echo '**PBS_NODEFILE***END*********' + +text_nodes < cat $PBS_NODEFILE + +module load MATLAB +module load COMSOL/51-EDU + +ntask=$(wc -l $PBS_NODEFILE) + +comsol -nn ${ntask} server -configuration /tmp -mpiarg -rmk -mpiarg pbs -tmpdir /scratch/work/user/$USER/work & +cd /apps/cae/COMSOL/51/mli +matlab -nodesktop -nosplash -r "mphstart; addpath /scratch/work/user/$USER/work; test_job" +` + +This example shows how to run Livelink for MATLAB with following +configuration: 3 nodes and 16 cores per node. Working directory has to +be created before submitting (comsol_matlab.pbs) job script into the +queue. Input file (test_job.m) has to be in working directory or full +path to input file has to be specified. The Matlab command option (-r +”mphstart”) created a connection with a COMSOL server using the default +port number. + diff --git a/converted/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.md b/converted/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.md new file mode 100644 index 0000000000000000000000000000000000000000..ffafbd5d118af4ba88750cfc5ac284e0a536d501 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.md @@ -0,0 +1,40 @@ +Licensing and Available Versions +================================ + +Comsol licence can be used by: +------------------------------ + +- all persons in the carrying out of the CE IT4Innovations Project (In + addition to the primary licensee, which is VSB - Technical + University of Ostrava, users are CE IT4Innovations third parties - + CE IT4Innovations project partners, particularly the University of + Ostrava, the Brno University of Technology - Faculty of Informatics, + the Silesian University in Opava, Institute of Geonics AS CR.) +- all + persons who have a valid + license +- students + of the Technical University + +Comsol EDU Network Licence +-------------------------- + +The licence intended to be used for science and research, publications, +students’ projects, teaching (academic licence). + +Comsol COM Network Licence +-------------------------- + +The licence intended to be used for science and research, publications, +students’ projects, commercial research with no commercial use +restrictions. > E +nables the solution +of at least one job +by one user in one +program start. + +Available Versions +------------------ + +- ver. 51 + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers.md b/converted/docs.it4i.cz/salomon/software/debuggers.md new file mode 100644 index 0000000000000000000000000000000000000000..2288b8716e0d235495a90e62f7f2f967e28f5a8c --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers.md @@ -0,0 +1,90 @@ +Debuggers and profilers summary +=============================== + + + +Introduction +------------ + +We provide state of the art programms and tools to develop, profile and +debug HPC codes at IT4Innovations. +On these pages, we provide an overview of the profiling and debugging +tools available on Anslem at IT4I. + +Intel debugger +-------------- + +Intel debugger is no longer available since Parallel Studio version 2015 + +The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html) +for running the GUI. + + $ module load intel + $ idb + +Read more at the [Intel +Debugger](intel-suite/intel-debugger.html) page. + +Allinea Forge (DDT/MAP) +----------------------- + +Allinea DDT, is a commercial debugger primarily for debugging parallel +MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel +Xeon Phi accelerators. DDT provides all the standard debugging features +(stack trace, breakpoints, watches, view variables, threads etc.) for +every thread running as part of your program, or for every process - +even if these processes are distributed across a cluster using an MPI +implementation. + + $ module load Forge + $ forge + +Read more at the [Allinea +DDT](debuggers/allinea-ddt.html) page. + +Allinea Performance Reports +--------------------------- + +Allinea Performance Reports characterize the performance of HPC +application runs. After executing your application through the tool, a +synthetic HTML report is generated automatically, containing information +about several metrics along with clear behavior statements and hints to +help you improve the efficiency of your runs. Our license is limited to +64 MPI processes. + + $ module load PerformanceReports/6.0 + $ perf-report mpirun -n 64 ./my_application argument01 argument02 + +Read more at the [Allinea Performance +Reports](debuggers/allinea-performance-reports.html) +page. + +RougeWave Totalview +------------------- + +TotalView is a source- and machine-level debugger for multi-process, +multi-threaded programs. Its wide range of tools provides ways to +analyze, organize, and test programs, making it easy to isolate and +identify problems in individual threads and processes in programs of +great complexity. + + $ module load TotalView/8.15.4-6-linux-x86-64 + $ totalview + +Read more at the [Totalview](debuggers/total-view.html) +page. + +Vampir trace analyzer +--------------------- + +Vampir is a GUI trace analyzer for traces in OTF format. + + $ module load Vampir/8.5.0 + $ vampir + +Read more at the [Vampir](debuggers/vampir.html) page. + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160211v14.27.45.png b/converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160211v14.27.45.png new file mode 100644 index 0000000000000000000000000000000000000000..8c56805d7a0a69f2c7b90d1c3c6ee1d8d84fc85a Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160211v14.27.45.png differ diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160708v12.33.35.png b/converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160708v12.33.35.png new file mode 100644 index 0000000000000000000000000000000000000000..d8ea15508f0714eeacfadff6d85fe8cafe5c406b Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160708v12.33.35.png differ diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/TightVNC_login.png b/converted/docs.it4i.cz/salomon/software/debuggers/TightVNC_login.png new file mode 100644 index 0000000000000000000000000000000000000000..078dfc73a90b2b3ffc1648fa82ba4b0a109fbc29 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/TightVNC_login.png differ diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/aislinn.md b/converted/docs.it4i.cz/salomon/software/debuggers/aislinn.md new file mode 100644 index 0000000000000000000000000000000000000000..ca9d44c11ee2289d3d1dc69e9612a863fa32d191 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/aislinn.md @@ -0,0 +1,150 @@ +Aislinn +======= + +- Aislinn is a dynamic verifier for MPI programs. For a fixed input it + covers all possible runs with respect to nondeterminism introduced + by MPI. It allows to detect bugs (for sure) that occurs very rare in + normal runs. +- Aislinn detects problems like invalid memory accesses, deadlocks, + misuse of MPI, and resource leaks. +- Aislinn is open-source software; you can use it without any + licensing limitations. +- Web page of the project: <http://verif.cs.vsb.cz/aislinn/> + +Note + +Aislinn is software developed at IT4Innovations and some parts are still +considered experimental. If you have any questions or experienced any +problems, please contact the author: <stanislav.bohm@vsb.cz>. + +### Usage + +Let us have the following program that contains a bug that is not +manifested in all runs: + +` +#include <mpi.h> +#include <stdlib.h> + +int main(int argc, char **argv) { + int rank; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (rank == 0) { + int *mem1 = (int*) malloc(sizeof(int) * 2); + int *mem2 = (int*) malloc(sizeof(int) * 3); + int data; + MPI_Recv(&data, 1, MPI_INT, MPI_ANY_SOURCE, 1, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + mem1[data] = 10; // <---------- Possible invalid memory write + MPI_Recv(&data, 1, MPI_INT, MPI_ANY_SOURCE, 1, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + mem2[data] = 10; + free(mem1); + free(mem2); + } + + if (rank == 1 || rank == 2) { + MPI_Send(&rank, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); + } + + MPI_Finalize(); + return 0; +} +` + +The program does the following: process 0 receives two messages from +anyone and processes 1 and 2 send a message to process 0. If a message +from process 1 is received first, then the run does not expose the +error. If a message from process 2 is received first, then invalid +memory write occurs at line 16. + +To verify this program by Aislinn, we first load Aislinn itself: + +` +$ module load aislinn +` + +Now we compile the program by Aislinn implementation of MPI. There are +`mpicc` for C programs and `mpicxx`{.docutils +.literal} for C++ programs. Only MPI parts of the verified application +has to be recompiled; non-MPI parts may remain untouched. Let us assume +that our program is in `test.cpp`. + +` +$ mpicc -g test.cpp -o test +` + +The `-g` flag is not necessary, but it puts more +debugging information into the program, hence Aislinn may provide more +detailed report. The command produces executable file `test`{.docutils +.literal}. + +Now we run the Aislinn itself. The argument `-p 3` +specifies that we want to verify our program for the case of three MPI +processes + +` +$ aislinn -p 3 ./test +==AN== INFO: Aislinn v0.3.0 +==AN== INFO: Found error 'Invalid write' +==AN== INFO: 1 error(s) found +==AN== INFO: Report written into 'report.html' +` + +Aislinn found an error and produced HTML report. To view it, we can use +any browser, e.g.: + + $ firefox report.html + +At the beginning of the report there are some basic summaries of the +verification. In the second part (depicted in the following picture), +the error is described. + + +It shows us: + + - Error occurs in process 0 in test.cpp on line 16. + - Stdout and stderr streams are empty. (The program does not + write anything). + - The last part shows MPI calls for each process that occurs in the + invalid run. The more detailed information about each call can be + obtained by mouse cursor. + +### Limitations + +Since the verification is a non-trivial process there are some of +limitations. + +- The verified process has to terminate in all runs, i.e. we cannot + answer the halting problem. +- The verification is a computationally and memory demanding process. + We put an effort to make it efficient and it is an important point + for further research. However covering all runs will be always more + demanding than techniques that examines only a single run. The good + practise is to start with small instances and when it is feasible, + make them bigger. The Aislinn is good to find bugs that are hard to + find because they occur very rarely (only in a rare scheduling). + Such bugs often do not need big instances. +- Aislinn expects that your program is a "standard MPI" program, i.e. + processes communicate only through MPI, the verified program does + not interacts with the system in some unusual ways (e.g. + opening sockets). + +There are also some limitations bounded to the current version and they +will be removed in the future: + +- All files containing MPI calls have to be recompiled by MPI + implementation provided by Aislinn. The files that does not contain + MPI calls, they do not have to recompiled. Aislinn MPI + implementation supports many commonly used calls from MPI-2 and + MPI-3 related to point-to-point communication, collective + communication, and communicator management. Unfortunately, MPI-IO + and one-side communication is not implemented yet. +- Each MPI can use only one thread (if you use OpenMP, set + `OMP_NUM_THREADS` to 1). +- There are some limitations for using files, but if the program just + reads inputs and writes results, it is ok. + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.md b/converted/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.md new file mode 100644 index 0000000000000000000000000000000000000000..e84881fc28688701accedcbd6f91a6d057b83f08 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.md @@ -0,0 +1,220 @@ +Allinea Forge (DDT,MAP) +======================= + + + +Allinea Forge consist of two tools - debugger DDT and profiler MAP. + +Allinea DDT, is a commercial debugger primarily for debugging parallel +MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel +Xeon Phi accelerators. DDT provides all the standard debugging features +(stack trace, breakpoints, watches, view variables, threads etc.) for +every thread running as part of your program, or for every process - +even if these processes are distributed across a cluster using an MPI +implementation. + +Allinea MAP is a profiler for C/C++/Fortran HPC codes. It is designed +for profiling parallel code, which uses pthreads, OpenMP or MPI. + +License and Limitations for the clusters Users +---------------------------------------------- + +On the clusters users can debug OpenMP or MPI code that runs up to 64 +parallel processes. In case of debugging GPU or Xeon Phi accelerated +codes the limit is 8 accelerators. These limitation means that: + +- 1 user can debug up 64 processes, or +- 32 users can debug 2 processes, etc. + +In case of debugging on accelerators: + +- 1 user can debug on up to 8 accelerators, or +- 8 users can debug on single accelerator. + +Compiling Code to run with Forge +-------------------------------- + +### Modules + +Load all necessary modules to compile the code. For example: + + $ module load intel + $ module load impi ... or ... module load OpenMPI + +Load the Allinea DDT module: + + $ module load Forge + +Compile the code: + +` +$ mpicc -g -O0 -o test_debug test.c + +$ mpif90 -g -O0 -o test_debug test.f +` + +### Compiler flags + +Before debugging, you need to compile your code with theses flags: + +-g** : Generates extra debugging information usable by GDB. -g3** +includes even more debugging information. This option is available for +GNU and INTEL C/C++ and Fortran compilers. + +-O0** : Suppress all optimizations.** + + + +Direct starting a Job with Forge +-------------------------------- + +Be sure to log in with an [ X window +forwarding +enabled](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html). +This could mean using the -X in the ssh:  + + $ ssh -X username@clustername.it4i.cz + +Other options is to access login node using VNC. Please see the detailed +information on [how to +use graphic user interface on the +clusters](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html) +. + +From the login node an interactive session **with X windows forwarding** +(-X option) can be started by following command: + + $ qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=24:mpiprocs=24,walltime=01:00:00 + +Then launch the debugger with the ddt command followed by the name of +the executable to debug: + + $ ddt test_debug + +Forge now has common GUI for both DDT and MAP. In interactive mode, you +can launch Forge using forge, ddt or map, +the latter two will just launch forge and swith to the respective +tab in the common GUI. + +A submission window that appears have +a prefilled path to the executable to debug. You can select the number +of MPI processors and/or OpenMP threads on which to run and press run. +Command line arguments to a program can be entered to the +"Arguments " +box. + + +  + +To start the debugging directly without the submission window, user can +specify the debugging and execution parameters from the command line. +For example the number of MPI processes is set by option "-np 4". +Skipping the dialog is done by "-start" option. To see the list of the +"ddt" command line parameters, run "ddt --help".  + + ddt -start -np 4 ./hello_debug_impi + +All of the above text also applies for MAP, just replace ddt command +with map. + +Reverse connect +--------------- + +Forge now provides a new convenient mode of operation, called Reverse +connect. Instead of launching a job from the GUI, the process is +reserved - DDT/MAP is launched as a server in the job which then +connects to a running instance of your GUI. + +To use Reverse connect, use a jobscript that you would normally use to +launch your application, just prepend ddt/map --connect to your +application: + + map --connect mpirun -np 24 ./mpi-test + ddt --connect mpirun -np 24 ./mpi-test + +Launch Forge GUI on login node and submit the job using qsub. When the +job starts running, Forge will ask you to accept the connection: + + + +After accepting the request, you can start remote profiling/debugging. + +Xeon Phi +-------- + +Forge allows debugging and profiling of both offload and native mode +Xeon Phi programs. + +### Offload mode + +It is recommended to set the following environment values on the offload +host: + + export MYO_WATCHDOG_MONITOR=-1 # To make sure the host process isn't killed when we enter a debugging session + export AMPLXE_COI_DEBUG_SUPPORT=true # To make sure that debugging symbols are accessible on the host and the card + unset OFFLOAD_MAIN # To make sure allinea DDT can attach to offloaded codes + +Then use one of the above mentioned methods to launch Forge. (Reverse +connect also works.) + +### Native mode + +Native mode programs can be profiled/debugged using the remote launch +feature. First, you need to create a script that will setup the +environment on the Phi card. An example: + + #!/bin/bash + # adjust PATH and LD_LIBRARY_PATH according to the toolchain/libraries your app is using. + export PATH=/apps/all/impi/5.0.3.048-iccifort-2015.3.187/mic/bin:$PATH + export LD_LIBRARY_PATH=/apps/all/impi/5.0.3.048-iccifort-2015.3.187/mic/lib:/apps/all/ifort/2015.3.187/lib/mic:/apps/all/icc/2015.3.187/lib/mic:$LD_LIBRARY_PATH + export MIC_OMP_NUM_THREADS=60 + export MYO_WATCHDOG_MONITOR=-1 + export AMPLXE_COI_DEBUG_SUPPORT=true + unset OFFLOAD_MAIN + export I_MPI_MIC=1 + +Save the script in eg. ~/remote-mic.sh. +Now, start an interactive graphical session on a node with +accelerator: + + $ qsub â€IX â€q qexp â€l select=1:ncpus=24:accelerator=True + +Launch Forge : + + $ module load Forge + $ forge& + +Now click on the remote launch drop-down list, select "Configure..." and +Add a new remote connection with the following parameters: + +Connection name: mic0 + +Hostname: mic0 + +Remote Installation Directory: /apps/all/Forge/5.1-43967/ + +Remote script: ~/remote-mic.sh + +You can click Test Remote Launch to verify the configuration. After you +save the remote launch configuration and select it in the dropdown list, +you can use the Run button in the main windows to remotely launch your +program on mic0. + +Documentation +------------- + +Users can find original User Guide after loading the Forge module: + + $EBROOTFORGE/doc/userguide-forge.pdf + + + + + + [1] Discipline, Magic, Inspiration and Science: Best Practice +Debugging with Allinea DDT, Workshop conducted at LLNL by Allinea on May +10, 2013, +[link](https://computing.llnl.gov/tutorials/allineaDDT/index.html) + + + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.md b/converted/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.md new file mode 100644 index 0000000000000000000000000000000000000000..f894c2ccc76f920294042c1ba86adaf4e4e39929 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.md @@ -0,0 +1,76 @@ +Allinea Performance Reports +=========================== + +quick application profiling + + + +Introduction +------------ + +Allinea Performance Reports characterize the performance of HPC +application runs. After executing your application through the tool, a +synthetic HTML report is generated automatically, containing information +about several metrics along with clear behavior statements and hints to +help you improve the efficiency of your runs. + +The Allinea Performance Reports is most useful in profiling MPI +programs. + +Our license is limited to 64 MPI processes. + +Modules +------- + +Allinea Performance Reports version 6.0 is available + + $ module load PerformanceReports/6.0 + +The module sets up environment variables, required for using the Allinea +Performance Reports. + +Usage +----- + +Use the the perf-report wrapper on your (MPI) program. + +Instead of [running your MPI program the usual +way](../mpi-1.html), use the the perf report wrapper: + + $ perf-report mpirun ./mympiprog.x + +The mpi program will run as usual. The perf-report creates two +additional files, in *.txt and *.html format, containing the +performance report. Note that demanding MPI +codes should be run within [ the queue +system](../../resource-allocation-and-job-execution/job-submission-and-execution.html). + +Example +------- + +In this example, we will be profiling the mympiprog.x MPI program, using +Allinea performance reports. Assume that the code is compiled with intel +compilers and linked against intel MPI library: + +First, we allocate some nodes via the express queue: + + $ qsub -q qexp -l select=2:ppn=24:mpiprocs=24:ompthreads=1 -I + qsub: waiting for job 262197.dm2 to start + qsub: job 262197.dm2 ready + +Then we load the modules and run the program the usual way: + + $ module load intel impi PerfReports/6.0 + $ mpirun ./mympiprog.x + +Now lets profile the code: + + $ perf-report mpirun ./mympiprog.x + +Performance report files +[mympiprog_32p*.txt](mympiprog_32p_2014-10-15_16-56.txt) +and +[mympiprog_32p*.html](mympiprog_32p_2014-10-15_16-56.html) +were created. We can see that the code is very efficient on MPI and is +CPU bounded. + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/ddt1.png b/converted/docs.it4i.cz/salomon/software/debuggers/ddt1.png new file mode 100644 index 0000000000000000000000000000000000000000..57a18f48908f0b8a4857bc14a820b450fcdd9652 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/ddt1.png differ diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.md b/converted/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.md new file mode 100644 index 0000000000000000000000000000000000000000..efb8cd82bcadf7f0d3acc92c19803b5c0713d346 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.md @@ -0,0 +1,137 @@ +Intel VTune Amplifier XE +======================== + + + +Introduction +------------ + +Intel*® *VTune™ >Amplifier, part of Intel Parallel studio, is a GUI +profiling tool designed for Intel processors. It offers a graphical +performance analysis of single core and multithreaded applications. A +highlight of the features: + +- Hotspot analysis +- Locks and waits analysis +- Low level specific counters, such as branch analysis and memory + bandwidth +- Power usage analysis - frequency and sleep states. + + + +Usage +----- + +To profile an application with VTune Amplifier, special kernel +modules need to be loaded. The modules are not loaded on the login +nodes, thus direct profiling on login nodes is not possible. By default, +the kernel modules ale not loaded on compute nodes neither. In order to +have the modules loaded, you need to specify vtune=version PBS resource +at job submit. The version is the same as for *environment module*. For +example to use +VTune/2016_update1: + + $ qsub -q qexp -A OPEN-0-0 -I -l select=1,vtune=2016_update1 + +After that, you can verify the modules sep*, pax and vtsspp are present +in the kernel : + + $ lsmod | grep -e sep -e pax -e vtsspp + vtsspp 362000 0 + sep3_15 546657 0 + pax 4312 0 + +To launch the GUI, first load the module: + + $ module add VTune/2016_update1 + + class="s1">and launch the GUI : + + $ amplxe-gui + +The GUI will open in new window. Click on "*New Project...*" to +create a new project. After clicking *OK*, a new window with project +properties will appear.  At "*Application:*", select the bath to your +binary you want to profile (the binary should be compiled with -g flag). +Some additional options such as command line arguments can be selected. +At "*Managed code profiling mode:*" select "*Native*" (unless you want +to profile managed mode .NET/Mono applications). After clicking *OK*, +your project is created. + +To run a new analysis, click "*New analysis...*". You will see a list of +possible analysis. Some of them will not be possible on the current CPU +(eg. Intel Atom analysis is not possible on Sandy bridge CPU), the GUI +will show an error box if you select the wrong analysis. For example, +select "*Advanced Hotspots*". Clicking on *Start *will start profiling +of the application. + +Remote Analysis +--------------- + +VTune Amplifier also allows a form of remote analysis. In this mode, +data for analysis is collected from the command line without GUI, and +the results are then loaded to GUI on another machine. This allows +profiling without interactive graphical jobs. To perform a remote +analysis, launch a GUI somewhere, open the new analysis window and then +click the button "*Command line*" in bottom right corner. It will show +the command line needed to perform the selected analysis. + +The command line will look like this: + + /apps/all/VTune/2016_update1/vtune_amplifier_xe_2016.1.1.434111/bin64/amplxe-cl -collect advanced-hotspots -app-working-dir /home/sta545/tmp -- /home/sta545/tmp/sgemm + +Copy the line to clipboard and then you can paste it in your jobscript +or in command line. After the collection is run, open the GUI once +again, click the menu button in the upper right corner, and select +"*Open > Result...*". The GUI will load the results from the run. + +Xeon Phi +-------- + +It is possible to analyze both native and offloaded Xeon Phi +applications. + +### Native mode + +This mode is useful for native Xeon Phi applications launched directly +on the card. In *Analysis Target* window, select *Intel Xeon Phi +coprocessor (native), *choose path to the binary and MIC card to run on. + +### Offload mode + +This mode is useful for applications that are launched from the host and +use offload, OpenCL or mpirun. In *Analysis Target* window, +select *Intel Xeon Phi coprocessor (native), *choose path to the binary +and MIC card to run on. + + + +If the analysis is interrupted or aborted, further analysis on the card +might be impossible and you will get errors like "ERROR connecting to +MIC card". In this case please contact our support to reboot the MIC +card. + +You may also use remote analysis to collect data from the MIC and then +analyze it in the GUI later : + +Native launch: + + $ /apps/all/VTune/2016_update1/vtune_amplifier_xe_2016.1.1.434111/bin64/amplxe-cl -target-system mic-native:0 -collect advanced-hotspots -- /home/sta545/tmp/vect-add-mic + +Host launch: + + $ /apps/all/VTune/2016_update1/vtune_amplifier_xe_2016.1.1.434111/bin64/amplxe-cl -target-system mic-host-launch:0 -collect advanced-hotspots -- /home/sta545/tmp/sgemm + +You can obtain this command line by pressing the "Command line..." +button on Analysis Type screen. + +References +---------- + +1. ><https://www.rcac.purdue.edu/tutorials/phi/PerformanceTuningXeonPhi-Tullos.pdf> Performance + Tuning for Intel® Xeon Phi™ Coprocessors +2. ><https://software.intel.com/en-us/intel-vtune-amplifier-xe-support/documentation> >Intel® + VTune™ Amplifier Support +3. ><https://software.intel.com/en-us/amplifier_help_linux> Linux + user guide + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/report.png b/converted/docs.it4i.cz/salomon/software/debuggers/report.png new file mode 100644 index 0000000000000000000000000000000000000000..d7614f04f06b8fb8b93e83225ad97e4210ea2c14 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/report.png differ diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/summary.md b/converted/docs.it4i.cz/salomon/software/debuggers/summary.md new file mode 100644 index 0000000000000000000000000000000000000000..a9ec88e500e2f43264c2ee09d8fba06c87bdc171 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/summary.md @@ -0,0 +1,87 @@ +Debuggers and profilers summary +=============================== + + + +Introduction +------------ + +We provide state of the art programms and tools to develop, profile and +debug HPC codes at IT4Innovations. +On these pages, we provide an overview of the profiling and debugging +tools available on Anslem at IT4I. + +Intel debugger +-------------- + +Intel debugger is no longer available since Parallel Studio version 2015 + +The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html) +for running the GUI. + + $ module load intel + $ idb + +Read more at the [Intel +Debugger](../intel-suite/intel-debugger.html) page. + +Allinea Forge (DDT/MAP) +----------------------- + +Allinea DDT, is a commercial debugger primarily for debugging parallel +MPI or OpenMP programs. It also has a support for GPU (CUDA) and Intel +Xeon Phi accelerators. DDT provides all the standard debugging features +(stack trace, breakpoints, watches, view variables, threads etc.) for +every thread running as part of your program, or for every process - +even if these processes are distributed across a cluster using an MPI +implementation. + + $ module load Forge + $ forge + +Read more at the [Allinea DDT](allinea-ddt.html) page. + +Allinea Performance Reports +--------------------------- + +Allinea Performance Reports characterize the performance of HPC +application runs. After executing your application through the tool, a +synthetic HTML report is generated automatically, containing information +about several metrics along with clear behavior statements and hints to +help you improve the efficiency of your runs. Our license is limited to +64 MPI processes. + + $ module load PerformanceReports/6.0 + $ perf-report mpirun -n 64 ./my_application argument01 argument02 + +Read more at the [Allinea Performance +Reports](allinea-performance-reports.html) page. + +RougeWave Totalview +------------------- + +TotalView is a source- and machine-level debugger for multi-process, +multi-threaded programs. Its wide range of tools provides ways to +analyze, organize, and test programs, making it easy to isolate and +identify problems in individual threads and processes in programs of +great complexity. + + $ module load TotalView/8.15.4-6-linux-x86-64 + $ totalview + +Read more at the [Totalview](total-view.html) page. + +Vampir trace analyzer +--------------------- + +Vampir is a GUI trace analyzer for traces in OTF format. + + $ module load Vampir/8.5.0 + $ vampir + +Read more at the [Vampir](vampir.html) page. + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/total-view.md b/converted/docs.it4i.cz/salomon/software/debuggers/total-view.md new file mode 100644 index 0000000000000000000000000000000000000000..9a7166c437444f5245d4d70baad456a4469e85c5 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/total-view.md @@ -0,0 +1,157 @@ +Total View +========== + +TotalView is a GUI-based source code multi-process, multi-thread +debugger. + +License and Limitations for cluster Users +----------------------------------------- + +On the cluster users can debug OpenMP or MPI code that runs up to 64 +parallel processes. These limitation means that: + +   1 user can debug up 64 processes, or +   32 users can debug 2 processes, etc. + +Debugging of GPU accelerated codes is also supported. + +You can check the status of the licenses +[here](https://extranet.it4i.cz/rsweb/anselm/license/totalview). + +Compiling Code to run with TotalView +------------------------------------ + +### Modules + +Load all necessary modules to compile the code. For example: + + module load intel + + module load impi  ... or ... module load OpenMPI/X.X.X-icc + +Load the TotalView module: + + module load TotalView/8.15.4-6-linux-x86-64 + +Compile the code: + + mpicc -g -O0 -o test_debug test.c + + mpif90 -g -O0 -o test_debug test.f + +### Compiler flags + +Before debugging, you need to compile your code with theses flags: + +-g** : Generates extra debugging information usable by GDB. -g3** +includes even more debugging information. This option is available for +GNU and INTEL C/C++ and Fortran compilers. + +-O0** : Suppress all optimizations.** + +Starting a Job with TotalView +----------------------------- + +Be sure to log in with an X window forwarding enabled. This could mean +using the -X in the ssh: + + ssh -X username@salomon.it4i.cz + +Other options is to access login node using VNC. Please see the detailed +information on how to use graphic user interface on Anselm +[here](https://docs.it4i.cz/salomon/software/debuggers/resolveuid/11e53ad0d2fd4c5187537f4baeedff33#VNC). + +From the login node an interactive session with X windows forwarding (-X +option) can be started by following command: + + qsub -I -X -A NONE-0-0 -q qexp -lselect=1:ncpus=24:mpiprocs=24,walltime=01:00:00 + +Then launch the debugger with the totalview command followed by the name +of the executable to debug. + +### Debugging a serial code + +To debug a serial code use: + + totalview test_debug + +### Debugging a parallel code - option 1 + +To debug a parallel code compiled with >**OpenMPI** you need +to setup your TotalView environment: + +Please note:** To be able to run parallel debugging procedure from the +command line without stopping the debugger in the mpiexec source code +you have to add the following function to your **~/.tvdrc** file: + + proc mpi_auto_run_starter {loaded_id} { +    set starter_programs {mpirun mpiexec orterun} +    set executable_name [TV::symbol get $loaded_id full_pathname] +    set file_component [file tail $executable_name] + +    if {[lsearch -exact $starter_programs $file_component] != -1} { +        puts "*************************************" +        puts "Automatically starting $file_component" +        puts "*************************************" +        dgo +    } + } + + # Append this function to TotalView's image load callbacks so that + # TotalView run this program automatically. + + dlappend TV::image_load_callbacks mpi_auto_run_starter + +The source code of this function can be also found in + + /apps/all/OpenMPI/1.10.1-GNU-4.9.3-2.25/etc/openmpi-totalview.tcl + +You can also add only following line to you ~/.tvdrc file instead of +the entire function: + +source /apps/all/OpenMPI/1.10.1-GNU-4.9.3-2.25/etc/openmpi-totalview.tcl** + +You need to do this step only once. See also [OpenMPI FAQ +entry](https://www.open-mpi.org/faq/?category=running#run-with-tv) + +Now you can run the parallel debugger using: + + mpirun -tv -n 5 ./test_debug + +When following dialog appears click on "Yes" + + + +At this point the main TotalView GUI window will appear and you can +insert the breakpoints and start debugging: + + + +### Debugging a parallel code - option 2 + +Other option to start new parallel debugging session from a command line +is to let TotalView to execute mpirun by itself. In this case user has +to specify a MPI implementation used to compile the source code. + +The following example shows how to start debugging session with Intel +MPI: + + module load intel/2015b-intel-2015b impi/5.0.3.048-iccifort-2015.3.187-GNU-5.1.0-2.25 TotalView/8.15.4-6-linux-x86-64 + + totalview -mpi "Intel MPI-Hydra" -np 8 ./hello_debug_impi + +After running previous command you will see the same window as shown in +the screenshot above. + +More information regarding the command line parameters of the TotalView +can be found TotalView Reference Guide, Chapter 7: TotalView Command +Syntax.  + +Documentation +------------- + +[1] The [TotalView +documentation](http://www.roguewave.com/support/product-documentation/totalview-family.aspx#totalview) +web page is a good resource for learning more about some of the advanced +TotalView features. + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/totalview1.png b/converted/docs.it4i.cz/salomon/software/debuggers/totalview1.png new file mode 100644 index 0000000000000000000000000000000000000000..09678f174b4f5d05ccda08cc11de059ee53e7e09 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/totalview1.png differ diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/totalview2.png b/converted/docs.it4i.cz/salomon/software/debuggers/totalview2.png new file mode 100644 index 0000000000000000000000000000000000000000..9d26ffd97db190a6b8161dd6358fc759ad2cb2ca Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/totalview2.png differ diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/valgrind.md b/converted/docs.it4i.cz/salomon/software/debuggers/valgrind.md new file mode 100644 index 0000000000000000000000000000000000000000..1cd426c1d28c0d13acb0208e7f071cd062f547d9 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/valgrind.md @@ -0,0 +1,299 @@ +Valgrind +======== + +Valgrind is a tool for memory debugging and profiling. + +About Valgrind +-------------- + +Valgrind is an open-source tool, used mainly for debuggig memory-related +problems, such as memory leaks, use of uninitalized memory etc. in C/C++ +applications. The toolchain was however extended over time with more +functionality, such as debugging of threaded applications, cache +profiling, not limited only to C/C++. + +Valgind is an extremely useful tool for debugging memory errors such as +[off-by-one](http://en.wikipedia.org/wiki/Off-by-one_error). +Valgrind uses a virtual machine and dynamic recompilation of binary +code, because of that, you can expect that programs being debugged by +Valgrind run 5-100 times slower. + +The main tools available in Valgrind are : + +- **Memcheck**, the original, must used and default tool. Verifies + memory access in you program and can detect use of unitialized + memory, out of bounds memory access, memory leaks, double free, etc. +- **Massif**, a heap profiler. +- **Hellgrind** and **DRD** can detect race conditions in + multi-threaded applications. +- **Cachegrind**, a cache profiler. +- **Callgrind**, a callgraph analyzer. +- For a full list and detailed documentation, please refer to the + [official Valgrind + documentation](http://valgrind.org/docs/). + +Installed versions +------------------ + +There are two versions of Valgrind available on the cluster. + +- >Version 3.8.1, installed by operating system vendor + in /usr/bin/valgrind. + >This version is available by default, without the need + to load any module. This version however does not provide additional + MPI support. Also, it does not support AVX2 instructions, + **debugging of an AVX2-enabled executable with this version will + fail** +- >Version 3.11.0 built by ICC with support for Intel MPI, + available in + [module](../../environment-and-modules.html) + Valgrind/3.11.0-intel-2015b. After loading + the module, this version replaces the default valgrind. +- Version 3.11.0 built by GCC with support for Open MPI, module + Valgrind/3.11.0-foss-2015b + +Usage +----- + +Compile the application which you want to debug as usual. It is +advisable to add compilation flags -g (to +add debugging information to the binary so that you will see original +source code lines in the output) and -O0 +(to disable compiler optimizations). + +For example, lets look at this C code, which has two problems : + + #include <stdlib.h> + + void f(void) + { + int* x = malloc(10 * sizeof(int)); + x[10] = 0; // problem 1: heap block overrun + } // problem 2: memory leak -- x not freed + + int main(void) + { + f(); + return 0; + } + +Now, compile it with Intel compiler : + + $ module add intel + $ icc -g valgrind-example.c -o valgrind-example + +Now, lets run it with Valgrind. The syntax is : + + valgrind [valgrind options] <your program +binary> [your program options] + +If no Valgrind options are specified, Valgrind defaults to running +Memcheck tool. Please refer to the Valgrind documentation for a full +description of command line options. + + $ valgrind ./valgrind-example + ==12652== Memcheck, a memory error detector + ==12652== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==12652== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==12652== Command: ./valgrind-example + ==12652== + ==12652== Invalid write of size 4 + ==12652== at 0x40053E: f (valgrind-example.c:6) + ==12652== by 0x40054E: main (valgrind-example.c:11) + ==12652== Address 0x5861068 is 0 bytes after a block of size 40 alloc'd + ==12652== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==12652== by 0x400528: f (valgrind-example.c:5) + ==12652== by 0x40054E: main (valgrind-example.c:11) + ==12652== + ==12652== + ==12652== HEAP SUMMARY: + ==12652== in use at exit: 40 bytes in 1 blocks + ==12652== total heap usage: 1 allocs, 0 frees, 40 bytes allocated + ==12652== + ==12652== LEAK SUMMARY: + ==12652== definitely lost: 40 bytes in 1 blocks + ==12652== indirectly lost: 0 bytes in 0 blocks + ==12652== possibly lost: 0 bytes in 0 blocks + ==12652== still reachable: 0 bytes in 0 blocks + ==12652== suppressed: 0 bytes in 0 blocks + ==12652== Rerun with --leak-check=full to see details of leaked memory + ==12652== + ==12652== For counts of detected and suppressed errors, rerun with: -v + ==12652== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 6 from 6) + +In the output we can see that Valgrind has detected both errors - the +off-by-one memory access at line 5 and a memory leak of 40 bytes. If we +want a detailed analysis of the memory leak, we need to run Valgrind +with --leak-check=full option : + + $ valgrind --leak-check=full ./valgrind-example + ==23856== Memcheck, a memory error detector + ==23856== Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al. + ==23856== Using Valgrind-3.6.0 and LibVEX; rerun with -h for copyright info + ==23856== Command: ./valgrind-example + ==23856== + ==23856== Invalid write of size 4 + ==23856== at 0x40067E: f (valgrind-example.c:6) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== Address 0x66e7068 is 0 bytes after a block of size 40 alloc'd + ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) + ==23856== by 0x400668: f (valgrind-example.c:5) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== + ==23856== + ==23856== HEAP SUMMARY: + ==23856== in use at exit: 40 bytes in 1 blocks + ==23856== total heap usage: 1 allocs, 0 frees, 40 bytes allocated + ==23856== + ==23856== 40 bytes in 1 blocks are definitely lost in loss record 1 of 1 + ==23856== at 0x4C26FDE: malloc (vg_replace_malloc.c:236) + ==23856== by 0x400668: f (valgrind-example.c:5) + ==23856== by 0x40068E: main (valgrind-example.c:11) + ==23856== + ==23856== LEAK SUMMARY: + ==23856== definitely lost: 40 bytes in 1 blocks + ==23856== indirectly lost: 0 bytes in 0 blocks + ==23856== possibly lost: 0 bytes in 0 blocks + ==23856== still reachable: 0 bytes in 0 blocks + ==23856== suppressed: 0 bytes in 0 blocks + ==23856== + ==23856== For counts of detected and suppressed errors, rerun with: -v + ==23856== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 6 from 6) + +Now we can see that the memory leak is due to the +malloc() at line 6. + +Usage with MPI +--------------------------- + +Although Valgrind is not primarily a parallel debugger, it can be used +to debug parallel applications as well. When launching your parallel +applications, prepend the valgrind command. For example : + + $ mpirun -np 4 valgrind myapplication + +The default version without MPI support will however report a large +number of false errors in the MPI library, such as : + + ==30166== Conditional jump or move depends on uninitialised value(s) + ==30166== at 0x4C287E8: strlen (mc_replace_strmem.c:282) + ==30166== by 0x55443BD: I_MPI_Processor_model_number (init_interface.c:427) + ==30166== by 0x55439E0: I_MPI_Processor_arch_code (init_interface.c:171) + ==30166== by 0x558D5AE: MPID_nem_impi_init_shm_configuration (mpid_nem_impi_extensions.c:1091) + ==30166== by 0x5598F4C: MPID_nem_init_ckpt (mpid_nem_init.c:566) + ==30166== by 0x5598B65: MPID_nem_init (mpid_nem_init.c:489) + ==30166== by 0x539BD75: MPIDI_CH3_Init (ch3_init.c:64) + ==30166== by 0x5578743: MPID_Init (mpid_init.c:193) + ==30166== by 0x554650A: MPIR_Init_thread (initthread.c:539) + ==30166== by 0x553369F: PMPI_Init (init.c:195) + ==30166== by 0x4008BD: main (valgrind-example-mpi.c:18) + +so it is better to use the MPI-enabled valgrind from module. The MPI +versions requires library : + +$EBROOTVALGRIND/lib/valgrind/libmpiwrap-amd64-linux.so + +which must be included in the LD_PRELOAD +environment variable. + +Lets look at this MPI example : + + #include <stdlib.h> + #include <mpi.h> + + int main(int argc, char *argv[]) + { +      int *data = malloc(sizeof(int)*99); + +      MPI_Init(&argc, &argv); +     MPI_Bcast(data, 100, MPI_INT, 0, MPI_COMM_WORLD); +      MPI_Finalize(); + +        return 0; + } + +There are two errors - use of uninitialized memory and invalid length of +the buffer. Lets debug it with valgrind : + + $ module add intel impi + $ mpiicc -g valgrind-example-mpi.c -o valgrind-example-mpi + $ module add Valgrind/3.11.0-intel-2015b + $ mpirun -np 2 -env LD_PRELOAD $EBROOTVALGRIND/lib/valgrind/libmpiwrap-amd64-linux.so valgrind ./valgrind-example-mpi + +Prints this output : (note that there is output printed for every +launched MPI process) + + ==31318== Memcheck, a memory error detector + ==31318== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==31318== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==31318== Command: ./valgrind-example-mpi + ==31318== + ==31319== Memcheck, a memory error detector + ==31319== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. + ==31319== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info + ==31319== Command: ./valgrind-example-mpi + ==31319== + valgrind MPI wrappers 31319: Active for pid 31319 + valgrind MPI wrappers 31319: Try MPIWRAP_DEBUG=help for possible options + valgrind MPI wrappers 31318: Active for pid 31318 + valgrind MPI wrappers 31318: Try MPIWRAP_DEBUG=help for possible options + ==31319== Unaddressable byte(s) found during client check request + ==31319== at 0x4E35974: check_mem_is_addressable_untyped (libmpiwrap.c:960) + ==31319== by 0x4E5D0FE: PMPI_Bcast (libmpiwrap.c:908) + ==31319== by 0x400911: main (valgrind-example-mpi.c:20) + ==31319== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd + ==31319== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31319== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31319== + ==31318== Uninitialised byte(s) found during client check request + ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) + ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) + ==31318== by 0x400911: main (valgrind-example-mpi.c:20) + ==31318== Address 0x6929040 is 0 bytes inside a block of size 396 alloc'd + ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31318== + ==31318== Unaddressable byte(s) found during client check request + ==31318== at 0x4E3591D: check_mem_is_defined_untyped (libmpiwrap.c:952) + ==31318== by 0x4E5D06D: PMPI_Bcast (libmpiwrap.c:908) + ==31318== by 0x400911: main (valgrind-example-mpi.c:20) + ==31318== Address 0x69291cc is 0 bytes after a block of size 396 alloc'd + ==31318== at 0x4C27AAA: malloc (vg_replace_malloc.c:291) + ==31318== by 0x4007BC: main (valgrind-example-mpi.c:8) + ==31318== + ==31318== + ==31318== HEAP SUMMARY: + ==31318== in use at exit: 3,172 bytes in 67 blocks + ==31318== total heap usage: 191 allocs, 124 frees, 81,203 bytes allocated + ==31318== + ==31319== + ==31319== HEAP SUMMARY: + ==31319== in use at exit: 3,172 bytes in 67 blocks + ==31319== total heap usage: 175 allocs, 108 frees, 48,435 bytes allocated + ==31319== + ==31318== LEAK SUMMARY: + ==31318== definitely lost: 408 bytes in 3 blocks + ==31318== indirectly lost: 256 bytes in 1 blocks + ==31318== possibly lost: 0 bytes in 0 blocks + ==31318== still reachable: 2,508 bytes in 63 blocks + ==31318== suppressed: 0 bytes in 0 blocks + ==31318== Rerun with --leak-check=full to see details of leaked memory + ==31318== + ==31318== For counts of detected and suppressed errors, rerun with: -v + ==31318== Use --track-origins=yes to see where uninitialised values come from + ==31318== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 4 from 4) + ==31319== LEAK SUMMARY: + ==31319== definitely lost: 408 bytes in 3 blocks + ==31319== indirectly lost: 256 bytes in 1 blocks + ==31319== possibly lost: 0 bytes in 0 blocks + ==31319== still reachable: 2,508 bytes in 63 blocks + ==31319== suppressed: 0 bytes in 0 blocks + ==31319== Rerun with --leak-check=full to see details of leaked memory + ==31319== + ==31319== For counts of detected and suppressed errors, rerun with: -v + ==31319== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4) + +We can see that Valgrind has reported use of unitialised memory on the +master process (which reads the array to be broadcasted) and use of +unaddresable memory on both processes. + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/vampir.md b/converted/docs.it4i.cz/salomon/software/debuggers/vampir.md new file mode 100644 index 0000000000000000000000000000000000000000..9d213a4212613538afaf684b13c9a01e59f6eaad --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/debuggers/vampir.md @@ -0,0 +1,32 @@ +Vampir +====== + +Vampir is a commercial trace analysis and visualisation tool. It can +work with traces in OTF and OTF2 formats. It does not have the +functionality to collect traces, you need to use a trace collection tool +(such as [Score-P](score-p.html)) first to collect the +traces. + + +- + +Installed versions +------------------ + +Version 8.5.0 is currently installed as module +Vampir/8.5.0 : + + $ module load Vampir/8.5.0 + $ vampir & + +User manual +----------- + +You can find the detailed user manual in PDF format in +$EBROOTVAMPIR/doc/vampir-manual.pdf + +References +---------- + +1. <https://www.vampir.eu> + diff --git a/converted/docs.it4i.cz/salomon/software/debuggers/vtune-amplifier.png b/converted/docs.it4i.cz/salomon/software/debuggers/vtune-amplifier.png new file mode 100644 index 0000000000000000000000000000000000000000..75ee99d84b87649151f22edad65de021ec348f1c Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/debuggers/vtune-amplifier.png differ diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/Snmekobrazovky20151204v15.35.12.png b/converted/docs.it4i.cz/salomon/software/intel-suite/Snmekobrazovky20151204v15.35.12.png new file mode 100644 index 0000000000000000000000000000000000000000..53c93b0687dabea5a6ae6a3e298d584d512133f4 Binary files /dev/null and b/converted/docs.it4i.cz/salomon/software/intel-suite/Snmekobrazovky20151204v15.35.12.png differ diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.md new file mode 100644 index 0000000000000000000000000000000000000000..805e7d905e3995fbc824d02091d6cefc569f374d --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.md @@ -0,0 +1,54 @@ +Intel Advisor +============= + +is tool aiming to assist you in vectorization and threading of your +code. You can use it to profile your application and identify +loops, that could benefit from vectorization and/or threading +parallelism. + +Installed versions +------------------ + +The following versions are currently available on Salomon as modules: + + --------- |---|---|------------- + |---|---| + |2016 Update 2|Advisor/2016_update2| + --------- |---|---|------------- + +Usage +----- + +Your program should be compiled with -g switch to include symbol names. +You should compile with -O2 or higher to see code that is already +vectorized by the compiler. + +Profiling is possible either directly from the GUI, or from command +line. + +To profile from GUI, launch Advisor: + + $ advixe-gui + +Then select menu File -> New -> Project. Choose a directory to +save project data to. After clicking OK, Project properties window will +appear, where you can configure path to your binary, launch arguments, +working directory etc. After clicking OK, the project is ready. + +In the left pane, you can switch between Vectorization and Threading +workflows. Each has several possible steps which you can execute by +clicking Collect button. Alternatively, you can click on Command Line, +to see the command line required to run the analysis directly from +command line. + +References +---------- + +1. [Intel® Advisor 2015 Tutorial: Find Where to Add Parallelism - C++ + Sample](https://software.intel.com/en-us/advisorxe_2015_tut_lin_c) +2. [Product + page](https://software.intel.com/en-us/intel-advisor-xe) +3. [Documentation](https://software.intel.com/en-us/intel-advisor-2016-user-guide-linux) + + + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.md new file mode 100644 index 0000000000000000000000000000000000000000..b846db42aa19e40378c81fa7a07bef2bde776b87 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.md @@ -0,0 +1,71 @@ +Intel Compilers +=============== + + + +The Intel compilers in multiple versions are available, via module +intel. The compilers include the icc C and C++ compiler and the ifort +fortran 77/90/95 compiler. + + $ module load intel + $ icc -v + $ ifort -v + +The intel compilers provide for vectorization of the code, via the AVX2 +instructions and support threading parallelization via OpenMP + +For maximum performance on the Salomon cluster compute nodes, compile +your programs using the AVX2 instructions, with reporting where the +vectorization was used. We recommend following compilation options for +high performance + + $ icc -ipo -O3 -xCORE-AVX2 -qopt-report1 -qopt-report-phase=vec myprog.c mysubroutines.c -o myprog.x + $ ifort -ipo -O3 -xCORE-AVX2 -qopt-report1 -qopt-report-phase=vec myprog.f mysubroutines.f -o myprog.x + +In this example, we compile the program enabling interprocedural +optimizations between source files (-ipo), aggresive loop optimizations +(-O3) and vectorization (-xCORE-AVX2) + +The compiler recognizes the omp, simd, vector and ivdep pragmas for +OpenMP parallelization and AVX2 vectorization. Enable the OpenMP +parallelization by the **-openmp** compiler switch. + + $ icc -ipo -O3 -xCORE-AVX2 -qopt-report1 -qopt-report-phase=vec -openmp myprog.c mysubroutines.c -o myprog.x + $ ifort -ipo -O3 -xCORE-AVX2 -qopt-report1 -qopt-report-phase=vec -openmp myprog.f mysubroutines.f -o myprog.x + +Read more +at <https://software.intel.com/en-us/intel-cplusplus-compiler-16.0-user-and-reference-guide> + +Sandy Bridge/Ivy Bridge/Haswell binary compatibility +---------------------------------------------------- + +Anselm nodes are currently equipped with Sandy Bridge CPUs, while +Salomon compute nodes are equipped with Haswell based architecture. The +UV1 SMP compute server has Ivy Bridge CPUs, which are equivalent to +Sandy Bridge (only smaller manufacturing technology). >The new +processors are backward compatible with the Sandy Bridge nodes, so all +programs that ran on the Sandy Bridge processors, should also run on the +new Haswell nodes. >To get optimal performance out of the +Haswell processors a program should make use of the +special >AVX2 instructions for this processor. One can do +this by recompiling codes with the compiler +flags >designated to invoke these instructions. For the +Intel compiler suite, there are two ways of +doing >this: + +- >Using compiler flag (both for Fortran and C): + -xCORE-AVX2. This will create a + binary class="s1">with AVX2 instructions, specifically + for the Haswell processors. Note that the + executable >will not run on Sandy Bridge/Ivy + Bridge nodes. +- >Using compiler flags (both for Fortran and C): + -xAVX -axCORE-AVX2. This + will >generate multiple, feature specific auto-dispatch + code paths for Intel® processors, if there is >a + performance benefit. So this binary will run both on Sandy + Bridge/Ivy Bridge and Haswell >processors. During + runtime it will be decided which path to follow, dependent on + which >processor you are running on. In general this + will result in larger binaries. + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.md new file mode 100644 index 0000000000000000000000000000000000000000..a806174fdc4f8b694b5db01a5582ed7e889b5831 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.md @@ -0,0 +1,100 @@ +Intel Debugger +============== + + + +IDB is no longer available since Intel Parallel Studio 2015 + +Debugging serial applications +----------------------------- + +The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html) +for running the GUI. + + $ module load intel/2014.06 + $ module load Java + $ idb + +The debugger may run in text mode. To debug in text mode, use + + $ idbc + +To debug on the compute nodes, module intel must be loaded. +The GUI on compute nodes may be accessed using the same way as in [the +GUI +section](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html) + +Example: + + $ qsub -q qexp -l select=1:ncpus=24 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19654.srv11 ready + + $ module load intel + $ module load Java + $ icc -O0 -g myprog.c -o myprog.x + $ idb ./myprog.x + +In this example, we allocate 1 full compute node, compile program +myprog.c with debugging options -O0 -g and run the idb debugger +interactively on the myprog.x executable. The GUI access is via X11 port +forwarding provided by the PBS workload manager. + +Debugging parallel applications +------------------------------- + +Intel debugger is capable of debugging multithreaded and MPI parallel +programs as well. + +### Small number of MPI ranks + +For debugging small number of MPI ranks, you may execute and debug each +rank in separate xterm terminal (do not forget the [X +display](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html)). +Using Intel MPI, this may be done in following way: + + $ qsub -q qexp -l select=2:ncpus=24 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19655.srv11 ready + + $ module load intel impi + $ mpirun -ppn 1 -hostfile $PBS_NODEFILE --enable-x xterm -e idbc ./mympiprog.x + +In this example, we allocate 2 full compute node, run xterm on each node +and start idb debugger in command line mode, debugging two ranks of +mympiprog.x application. The xterm will pop up for each rank, with idb +prompt ready. The example is not limited to use of Intel MPI + +### Large number of MPI ranks + +Run the idb debugger from within the MPI debug option. This will cause +the debugger to bind to all ranks and provide aggregated outputs across +the ranks, pausing execution automatically just after startup. You may +then set break points and step the execution manually. Using Intel MPI: + + $ qsub -q qexp -l select=2:ncpus=24 -X -I + qsub: waiting for job 19654.srv11 to start + qsub: job 19655.srv11 ready + + $ module load intel impi + $ mpirun -n 48 -idb ./mympiprog.x + +### Debugging multithreaded application + +Run the idb debugger in GUI mode. The menu Parallel contains number of +tools for debugging multiple threads. One of the most useful tools is +the **Serialize Execution** tool, which serializes execution of +concurrent threads for easy orientation and identification of +concurrency related bugs. + +Further information +------------------- + +Exhaustive manual on idb features and usage is published at Intel +website, +<https://software.intel.com/sites/products/documentation/doclib/iss/2013/compiler/cpp-lin/> + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.md new file mode 100644 index 0000000000000000000000000000000000000000..cfbde48083ddaa8cc5cc60a481e0278375177302 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.md @@ -0,0 +1,63 @@ +Intel Inspector +=============== + +Intel Inspector is a dynamic memory and threading error checking tool +for C/C++/Fortran applications. It can detect issues such as memory +leaks, invalid memory references, uninitalized variables, race +conditions, deadlocks etc. + +Installed versions +------------------ + +The following versions are currently available on Salomon as modules: + + --------- |---|---|--------------- + |---|---| + |2016 Update 1|Inspector/2016_update1| + --------- |---|---|--------------- + +Usage +----- + +Your program should be compiled with -g switch to include symbol names. +Optimizations can be turned on. + +Debugging is possible either directly from the GUI, or from command +line. + +### GUI mode + +To debug from GUI, launch Inspector: + + $ inspxe-gui & + +Then select menu File -> New -> Project. Choose a directory to +save project data to. After clicking OK, Project properties window will +appear, where you can configure path to your binary, launch arguments, +working directory etc. After clicking OK, the project is ready. + +In the main pane, you can start a predefined analysis type or define +your own. Click Start to start the analysis. Alternatively, you can +click on Command Line, to see the command line required to run the +analysis directly from command line. + +### Batch mode + +Analysis can be also run from command line in batch mode. Batch mode +analysis is run with command inspxe-cl. +To obtain the required parameters, either consult the documentation or +you can configure the analysis in the GUI and then click "Command Line" +button in the lower right corner to the respective command line. + +Results obtained from batch mode can be then viewed in the GUI by +selecting File -> Open -> Result... + +References +---------- + +1. [Product + page](https://software.intel.com/en-us/intel-inspector-xe) +2. [Documentation and Release + Notes](https://software.intel.com/en-us/intel-inspector-xe-support/documentation) +3. [Tutorials](https://software.intel.com/en-us/articles/inspectorxe-tutorials) + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.md new file mode 100644 index 0000000000000000000000000000000000000000..5ec3bf70b64bf35c2fdf3382bc0c354afe340941 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.md @@ -0,0 +1,94 @@ +Intel IPP +========= + + + +Intel Integrated Performance Primitives +--------------------------------------- + +Intel Integrated Performance Primitives, version 9.0.1, compiled for +AVX2 vector instructions is available, via module ipp. The IPP is a very +rich library of highly optimized algorithmic building blocks for media +and data applications. This includes signal, image and frame processing +algorithms, such as FFT, FIR, Convolution, Optical Flow, Hough +transform, Sum, MinMax, as well as cryptographic functions, linear +algebra functions and many more. + +Check out IPP before implementing own math functions for data +processing, it is likely already there. + + $ module load ipp + +The module sets up environment variables, required for linking and +running ipp enabled applications. + +IPP example +----------- + + #include "ipp.h" + #include <stdio.h> + int main(int argc, char* argv[]) + { + const IppLibraryVersion *lib; + Ipp64u fm; + IppStatus status; + + status= ippInit(); //IPP initialization with the best optimization layer + if( status != ippStsNoErr ) { + printf("IppInit() Error:n"); + printf("%sn", ippGetStatusString(status) ); + return -1; + } + + //Get version info + lib = ippiGetLibVersion(); + printf("%s %sn", lib->Name, lib->Version); + + //Get CPU features enabled with selected library level + fm=ippGetEnabledCpuFeatures(); + printf("SSE :%cn",(fm>1)&1?'Y':'N'); + printf("SSE2 :%cn",(fm>2)&1?'Y':'N'); + printf("SSE3 :%cn",(fm>3)&1?'Y':'N'); + printf("SSSE3 :%cn",(fm>4)&1?'Y':'N'); + printf("SSE41 :%cn",(fm>6)&1?'Y':'N'); + printf("SSE42 :%cn",(fm>7)&1?'Y':'N'); + printf("AVX :%cn",(fm>8)&1 ?'Y':'N'); + printf("AVX2 :%cn", (fm>15)&1 ?'Y':'N' ); + printf("----------n"); + printf("OS Enabled AVX :%cn", (fm>9)&1 ?'Y':'N'); + printf("AES :%cn", (fm>10)&1?'Y':'N'); + printf("CLMUL :%cn", (fm>11)&1?'Y':'N'); + printf("RDRAND :%cn", (fm>13)&1?'Y':'N'); + printf("F16C :%cn", (fm>14)&1?'Y':'N'); + + return 0; + } + + Compile above example, using any compiler and the ipp module. + + $ module load intel + $ module load ipp + + $ icc testipp.c -o testipp.x -lippi -lipps -lippcore + +You will need the ipp module loaded to run the ipp enabled executable. +This may be avoided, by compiling library search paths into the +executable + + $ module load intel + $ module load ipp + + $ icc testipp.c -o testipp.x -Wl,-rpath=$LIBRARY_PATH -lippi -lipps -lippcore + +Code samples and documentation +------------------------------ + +Intel provides number of [Code Samples for +IPP](https://software.intel.com/en-us/articles/code-samples-for-intel-integrated-performance-primitives-library), +illustrating use of IPP. + +Read full documentation on IPP [on Intel +website,](http://software.intel.com/sites/products/search/search.php?q=&x=15&y=6&product=ipp&version=7.1&docos=lin) +in particular the [IPP Reference +manual.](http://software.intel.com/sites/products/documentation/doclib/ipp_sa/71/ipp_manual/index.htm) + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.md new file mode 100644 index 0000000000000000000000000000000000000000..57711b34ae2797e4040aa04a62ce81c096be5d6e --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.md @@ -0,0 +1,200 @@ +Intel MKL +========= + + + +Intel Math Kernel Library +------------------------- + +Intel Math Kernel Library (Intel MKL) is a library of math kernel +subroutines, extensively threaded and optimized for maximum performance. +Intel MKL provides these basic math kernels: + +- + + + + BLAS (level 1, 2, and 3) and LAPACK linear algebra routines, + offering vector, vector-matrix, and matrix-matrix operations. +- + + + + The PARDISO direct sparse solver, an iterative sparse solver, + and supporting sparse BLAS (level 1, 2, and 3) routines for solving + sparse systems of equations. +- + + + + ScaLAPACK distributed processing linear algebra routines for + Linux* and Windows* operating systems, as well as the Basic Linear + Algebra Communications Subprograms (BLACS) and the Parallel Basic + Linear Algebra Subprograms (PBLAS). +- + + + + Fast Fourier transform (FFT) functions in one, two, or three + dimensions with support for mixed radices (not limited to sizes that + are powers of 2), as well as distributed versions of + these functions. +- + + + + Vector Math Library (VML) routines for optimized mathematical + operations on vectors. +- + + + + Vector Statistical Library (VSL) routines, which offer + high-performance vectorized random number generators (RNG) for + several probability distributions, convolution and correlation + routines, and summary statistics functions. +- + + + + Data Fitting Library, which provides capabilities for + spline-based approximation of functions, derivatives and integrals + of functions, and search. +- Extended Eigensolver, a shared memory version of an eigensolver + based on the Feast Eigenvalue Solver. + +For details see the [Intel MKL Reference +Manual](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mklman/index.htm). + +Intel MKL version 11.2.3.187 is available on the cluster + + $ module load imkl + +The module sets up environment variables, required for linking and +running mkl enabled applications. The most important variables are the +$MKLROOT, $CPATH, $LD_LIBRARY_PATH and $MKL_EXAMPLES + +Intel MKL library may be linked using any compiler. +With intel compiler use -mkl option to link default threaded MKL. + +### Interfaces + +Intel MKL library provides number of interfaces. The fundamental once +are the LP64 and ILP64. The Intel MKL ILP64 libraries use the 64-bit +integer type (necessary for indexing large arrays, with more than +231^-1 elements), whereas the LP64 libraries index arrays with the +32-bit integer type. + + |Interface|Integer type| + ----- |---|---|------------------------------------- + |LP64|32-bit, int, integer(kind=4), MPI_INT| + ILP64 64-bit, long int, integer(kind=8), MPI_INT64 + +### Linking + +Linking Intel MKL libraries may be complex. Intel [mkl link line +advisor](http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor) +helps. See also [examples](intel-mkl.html#examples) below. + +You will need the mkl module loaded to run the mkl enabled executable. +This may be avoided, by compiling library search paths into the +executable. Include rpath on the compile line: + + $ icc .... -Wl,-rpath=$LIBRARY_PATH ... + +### Threading + +Advantage in using Intel MKL library is that it brings threaded +parallelization to applications that are otherwise not parallel. + +For this to work, the application must link the threaded MKL library +(default). Number and behaviour of MKL threads may be controlled via the +OpenMP environment variables, such as OMP_NUM_THREADS and +KMP_AFFINITY. MKL_NUM_THREADS takes precedence over OMP_NUM_THREADS + + $ export OMP_NUM_THREADS=24 + $ export KMP_AFFINITY=granularity=fine,compact,1,0 + +The application will run with 24 threads with affinity optimized for +fine grain parallelization. + +Examples +------------ + +Number of examples, demonstrating use of the Intel MKL library and its +linking is available on clusters, in the $MKL_EXAMPLES directory. In +the examples below, we demonstrate linking Intel MKL to Intel and GNU +compiled program for multi-threaded matrix multiplication. + +### Working with examples + + $ module load intel + $ module load imkl + $ cp -a $MKL_EXAMPLES/cblas /tmp/ + $ cd /tmp/cblas + + $ make sointel64 function=cblas_dgemm + +In this example, we compile, link and run the cblas_dgemm example, +demonstrating use of MKL example suite installed on clusters. + +### Example: MKL and Intel compiler + + $ module load intel + $ module load imkl + $ cp -a $MKL_EXAMPLES/cblas /tmp/ + $ cd /tmp/cblas + $ + $ icc -w source/cblas_dgemmx.c source/common_func.c -mkl -o cblas_dgemmx.x + $ ./cblas_dgemmx.x data/cblas_dgemmx.d + +In this example, we compile, link and run the cblas_dgemm example, +demonstrating use of MKL with icc -mkl option. Using the -mkl option is +equivalent to: + + $ icc -w source/cblas_dgemmx.c source/common_func.c -o cblas_dgemmx.x + -I$MKL_INC_DIR -L$MKL_LIB_DIR -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 + +In this example, we compile and link the cblas_dgemm example, using +LP64 interface to threaded MKL and Intel OMP threads implementation. + +### Example: Intel MKL and GNU compiler + + $ module load GCC + $ module load imkl + $ cp -a $MKL_EXAMPLES/cblas /tmp/ + $ cd /tmp/cblas + + $ gcc -w source/cblas_dgemmx.c source/common_func.c -o cblas_dgemmx.x + -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lm + + $ ./cblas_dgemmx.x data/cblas_dgemmx.d + +In this example, we compile, link and run the cblas_dgemm example, +using LP64 interface to threaded MKL and gnu OMP threads implementation. + +MKL and MIC accelerators +------------------------ + +The Intel MKL is capable to automatically offload the computations o the +MIC accelerator. See section [Intel Xeon +Phi](../intel-xeon-phi.html) for details. + +LAPACKE C Interface +------------------- + +MKL includes LAPACKE C Interface to LAPACK. For some reason, although +Intel is the author of LAPACKE, the LAPACKE header files are not present +in MKL. For this reason, we have prepared +LAPACKE module, which includes Intel's LAPACKE +headers from official LAPACK, which you can use to compile code using +LAPACKE interface against MKL. + +Further reading +--------------- + +Read more on [Intel +website](http://software.intel.com/en-us/intel-mkl), in +particular the [MKL users +guide](https://software.intel.com/en-us/intel-mkl/documentation/linux). + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..d77b2d908e90cb53d38269d3e3094a3d6c43e3c8 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.md @@ -0,0 +1,95 @@ +Intel Parallel Studio +===================== + + + +The Salomon cluster provides following elements of the Intel Parallel +Studio XE + + Intel Parallel Studio XE + ------------------------------------------------- + Intel Compilers + Intel Debugger + Intel MKL Library + Intel Integrated Performance Primitives Library + Intel Threading Building Blocks Library + Intel Trace Analyzer and Collector + Intel Advisor + Intel Inspector + +Intel compilers +--------------- + +The Intel compilers version 131.3 are available, via module +iccifort/2013.5.192-GCC-4.8.3. The compilers include the icc C and C++ +compiler and the ifort fortran 77/90/95 compiler. + + $ module load intel + $ icc -v + $ ifort -v + +Read more at the [Intel Compilers](intel-compilers.html) +page. + +Intel debugger +-------------- + +IDB is no longer available since Parallel Studio 2015. + + The intel debugger version 13.0 is available, via module intel. The +debugger works for applications compiled with C and C++ compiler and the +ifort fortran 77/90/95 compiler. The debugger provides java GUI +environment. Use [X +display](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html) +for running the GUI. + + $ module load intel + $ idb + +Read more at the [Intel Debugger](intel-debugger.html) +page. + +Intel Math Kernel Library +------------------------- + +Intel Math Kernel Library (Intel MKL) is a library of math kernel +subroutines, extensively threaded and optimized for maximum performance. +Intel MKL unites and provides these basic components: BLAS, LAPACK, +ScaLapack, PARDISO, FFT, VML, VSL, Data fitting, Feast Eigensolver and +many more. + + $ module load imkl + +Read more at the [Intel MKL](intel-mkl.html) page. + +Intel Integrated Performance Primitives +--------------------------------------- + +Intel Integrated Performance Primitives, version 7.1.1, compiled for AVX +is available, via module ipp. The IPP is a library of highly optimized +algorithmic building blocks for media and data applications. This +includes signal, image and frame processing algorithms, such as FFT, +FIR, Convolution, Optical Flow, Hough transform, Sum, MinMax and many +more. + + $ module load ipp + +Read more at the [Intel +IPP](intel-integrated-performance-primitives.html) page. + +Intel Threading Building Blocks +------------------------------- + +Intel Threading Building Blocks (Intel TBB) is a library that supports +scalable parallel programming using standard ISO C++ code. It does not +require special languages or compilers. It is designed to promote +scalable data parallel programming. Additionally, it fully supports +nested parallelism, so you can build larger parallel components from +smaller parallel components. To use the library, you specify tasks, not +threads, and let the library map tasks onto threads in an efficient +manner. + + $ module load tbb + +Read more at the [Intel TBB](intel-tbb.html) page. + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.md new file mode 100644 index 0000000000000000000000000000000000000000..8fea59c78cccd8be49c2d21e0e18f89ba0e1b2ea --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.md @@ -0,0 +1,54 @@ +Intel TBB +========= + + + +Intel Threading Building Blocks +------------------------------- + +Intel Threading Building Blocks (Intel TBB) is a library that supports +scalable parallel programming using standard ISO C++ code. It does not +require special languages or compilers. To use the library, you specify +tasks, not threads, and let the library map tasks onto threads in an +efficient manner. The tasks are executed by a runtime scheduler and may +be offloaded to [MIC +accelerator](../intel-xeon-phi.html). + +Intel TBB version 4.3.5.187 is available on the cluster. + + $ module load tbb + +The module sets up environment variables, required for linking and +running tbb enabled applications. + +Link the tbb library, using -ltbb + +Examples +-------- + +Number of examples, demonstrating use of TBB and its built-in scheduler +is available on Anselm, in the $TBB_EXAMPLES directory. + + $ module load intel + $ module load tbb + $ cp -a $TBB_EXAMPLES/common $TBB_EXAMPLES/parallel_reduce /tmp/ + $ cd /tmp/parallel_reduce/primes + $ icc -O2 -DNDEBUG -o primes.x main.cpp primes.cpp -ltbb + $ ./primes.x + +In this example, we compile, link and run the primes example, +demonstrating use of parallel task-based reduce in computation of prime +numbers. + +You will need the tbb module loaded to run the tbb enabled executable. +This may be avoided, by compiling library search paths into the +executable. + + $ icc -O2 -o primes.x main.cpp primes.cpp -Wl,-rpath=$LIBRARY_PATH -ltbb + +Further reading +--------------- + +Read more on Intel website, +<http://software.intel.com/sites/products/documentation/doclib/tbb_sa/help/index.htm> + diff --git a/converted/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.md b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.md new file mode 100644 index 0000000000000000000000000000000000000000..510aae8b64321e4fbdb8f769730d4fa92552b81b --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.md @@ -0,0 +1,54 @@ +Intel Trace Analyzer and Collector +================================== + +Intel Trace Analyzer and Collector (ITAC) is a tool to collect and +graphicaly analyze behaviour of MPI applications. It helps you to +analyze communication patterns of your application, identify hotspots, +perform correctnes checking (identify deadlocks, data corruption etc), +simulate how your application would run on a different interconnect. + +ITAC is a offline analysis tool - first you run your application to +collect a trace file, then you can open the trace in a GUI analyzer to +view it. + +Installed version +----------------- + +Currently on Salomon is version 9.1.2.024 available as module +itac/9.1.2.024 + +Collecting traces +----------------- + +ITAC can collect traces from applications that are using Intel MPI. To +generate a trace, simply add -trace option to your mpirun command : + + $ module load itac/9.1.2.024 + $ mpirun -trace myapp + +The trace will be saved in file myapp.stf in the current directory. + +Viewing traces +-------------- + +To view and analyze the trace, open ITAC GUI in a [graphical +environment](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html) +: + + $ module load itac/9.1.2.024 + $ traceanalyzer + +The GUI will launch and you can open the produced *.stf file. + + + +Please refer to Intel documenation about usage of the GUI tool. + +References +---------- + +1. [Getting Started with Intel® Trace Analyzer and + Collector](https://software.intel.com/en-us/get-started-with-itac-for-linux) +2. [Intel® Trace Analyzer and Collector - + Documentation](http://Intel®%20Trace%20Analyzer%20and%20Collector%20-%20Documentation) + diff --git a/converted/docs.it4i.cz/salomon/software/intel-xeon-phi.md b/converted/docs.it4i.cz/salomon/software/intel-xeon-phi.md new file mode 100644 index 0000000000000000000000000000000000000000..55e70e7577678526a8a1fb197ac915527ff85895 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/intel-xeon-phi.md @@ -0,0 +1,1062 @@ +Intel Xeon Phi +============== + +A guide to Intel Xeon Phi usage + + + +Intel Xeon Phi accelerator can be programmed in several modes. The +default mode on the cluster is offload mode, but all modes described in +this document are supported. + +Intel Utilities for Xeon Phi +---------------------------- + +To get access to a compute node with Intel Xeon Phi accelerator, use the +PBS interactive session + + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + +To set up the environment module "intel" has to be loaded, without +specifying the version, default version is loaded (at time of writing +this, it's 2015b) + + $ module load intel + +Information about the hardware can be obtained by running +the micinfo program on the host. + + $ /usr/bin/micinfo + +The output of the "micinfo" utility executed on one of the cluster node +is as follows. (note: to get PCIe related details the command has to be +run with root privileges) + + MicInfo Utility Log + Created Mon Aug 17 13:55:59 2015 + + System Info + HOST OS : Linux + OS Version : 2.6.32-504.16.2.el6.x86_64 + Driver Version : 3.4.1-1 + MPSS Version : 3.4.1 + Host Physical Memory : 131930 MB + + Device No: 0, Device Name: mic0 + + Version + Flash Version : 2.1.02.0390 + SMC Firmware Version : 1.16.5078 + SMC Boot Loader Version : 1.8.4326 + uOS Version : 2.6.38.8+mpss3.4.1 + Device Serial Number : ADKC44601414 + + Board + Vendor ID : 0x8086 + Device ID : 0x225c + Subsystem ID : 0x7d95 + Coprocessor Stepping ID : 2 + PCIe Width : x16 + PCIe Speed : 5 GT/s + PCIe Max payload size : 256 bytes + PCIe Max read req size : 512 bytes + Coprocessor Model : 0x01 + Coprocessor Model Ext : 0x00 + Coprocessor Type : 0x00 + Coprocessor Family : 0x0b + Coprocessor Family Ext : 0x00 + Coprocessor Stepping : C0 + Board SKU : C0PRQ-7120 P/A/X/D + ECC Mode : Enabled + SMC HW Revision : Product 300W Passive CS + + Cores + Total No of Active Cores : 61 + Voltage : 1007000 uV + Frequency : 1238095 kHz + + Thermal + Fan Speed Control : N/A + Fan RPM : N/A + Fan PWM : N/A + Die Temp : 60 C + + GDDR + GDDR Vendor : Samsung + GDDR Version : 0x6 + GDDR Density : 4096 Mb + GDDR Size : 15872 MB + GDDR Technology : GDDR5 + GDDR Speed : 5.500000 GT/s + GDDR Frequency : 2750000 kHz + GDDR Voltage : 1501000 uV + + Device No: 1, Device Name: mic1 + + Version + Flash Version : 2.1.02.0390 + SMC Firmware Version : 1.16.5078 + SMC Boot Loader Version : 1.8.4326 + uOS Version : 2.6.38.8+mpss3.4.1 + Device Serial Number : ADKC44500454 + + Board + Vendor ID : 0x8086 + Device ID : 0x225c + Subsystem ID : 0x7d95 + Coprocessor Stepping ID : 2 + PCIe Width : x16 + PCIe Speed : 5 GT/s + PCIe Max payload size : 256 bytes + PCIe Max read req size : 512 bytes + Coprocessor Model : 0x01 + Coprocessor Model Ext : 0x00 + Coprocessor Type : 0x00 + Coprocessor Family : 0x0b + Coprocessor Family Ext : 0x00 + Coprocessor Stepping : C0 + Board SKU : C0PRQ-7120 P/A/X/D + ECC Mode : Enabled + SMC HW Revision : Product 300W Passive CS + + Cores + Total No of Active Cores : 61 + Voltage : 998000 uV + Frequency : 1238095 kHz + + Thermal + Fan Speed Control : N/A + Fan RPM : N/A + Fan PWM : N/A + Die Temp : 59 C + + GDDR + GDDR Vendor : Samsung + GDDR Version : 0x6 + GDDR Density : 4096 Mb + GDDR Size : 15872 MB + GDDR Technology : GDDR5 + GDDR Speed : 5.500000 GT/s + GDDR Frequency : 2750000 kHz + GDDR Voltage : 1501000 uV + +Offload Mode +------------ + +To compile a code for Intel Xeon Phi a MPSS stack has to be installed on +the machine where compilation is executed. Currently the MPSS stack is +only installed on compute nodes equipped with accelerators. + + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + $ module load intel + +For debugging purposes it is also recommended to set environment +variable "OFFLOAD_REPORT". Value can be set from 0 to 3, where higher +number means more debugging information. + + export OFFLOAD_REPORT=3 + +A very basic example of code that employs offload programming technique +is shown in the next listing. Please note that this code is sequential +and utilizes only single core of the accelerator. + + $ vim source-offload.cpp + + #include <iostream> + + int main(int argc, char* argv[]) + { +    const int niter = 100000; +    double result = 0; + +  #pragma offload target(mic) +    for (int i = 0; i < niter; ++i) { +        const double t = (i + 0.5) / niter; +        result += 4.0 / (t * t + 1.0); +    } +    result /= niter; +    std::cout << "Pi ~ " << result << 'n'; + } + +To compile a code using Intel compiler run + + $ icc source-offload.cpp -o bin-offload + +To execute the code, run the following command on the host + + ./bin-offload + +### Parallelization in Offload Mode Using OpenMP + +One way of paralelization a code for Xeon Phi is using OpenMP +directives. The following example shows code for parallel vector +addition. + + $ vim ./vect-add + + #include <stdio.h> + + typedef int T; + + #define SIZE 1000 + + #pragma offload_attribute(push, target(mic)) + T in1[SIZE]; + T in2[SIZE]; + T res[SIZE]; + #pragma offload_attribute(pop) + + // MIC function to add two vectors + __attribute__((target(mic))) add_mic(T *a, T *b, T *c, int size) { +  int i = 0; +  #pragma omp parallel for +    for (i = 0; i < size; i++) +      c[i] = a[i] + b[i]; + } + + // CPU function to add two vectors + void add_cpu (T *a, T *b, T *c, int size) { +  int i; +  for (i = 0; i < size; i++) +    c[i] = a[i] + b[i]; + } + + // CPU function to generate a vector of random numbers + void random_T (T *a, int size) { +  int i; +  for (i = 0; i < size; i++) +    a[i] = rand() % 10000; // random number between 0 and 9999 + } + + // CPU function to compare two vectors + int compare(T *a, T *b, T size ){ +  int pass = 0; +  int i; +  for (i = 0; i < size; i++){ +    if (a[i] != b[i]) { +      printf("Value mismatch at location %d, values %d and %dn",i, a[i], b[i]); +      pass = 1; +    } +  } +  if (pass == 0) printf ("Test passedn"); else printf ("Test Failedn"); +  return pass; + } + + int main() + { +  int i; +  random_T(in1, SIZE); +  random_T(in2, SIZE); + +  #pragma offload target(mic) in(in1,in2) inout(res) +  { + +    // Parallel loop from main function +    #pragma omp parallel for +    for (i=0; i<SIZE; i++) +      res[i] = in1[i] + in2[i]; + +    // or parallel loop is called inside the function +    add_mic(in1, in2, res, SIZE); + +  } + +  //Check the results with CPU implementation +  T res_cpu[SIZE]; +  add_cpu(in1, in2, res_cpu, SIZE); +  compare(res, res_cpu, SIZE); + + } + +During the compilation Intel compiler shows which loops have been +vectorized in both host and accelerator. This can be enabled with +compiler option "-vec-report2". To compile and execute the code run + + $ icc vect-add.c -openmp_report2 -vec-report2 -o vect-add + + $ ./vect-add + +Some interesting compiler flags useful not only for code debugging are: + +Debugging + openmp_report[0|1|2] - controls the compiler based vectorization +diagnostic level + vec-report[0|1|2] - controls the OpenMP parallelizer diagnostic +level + +Performance ooptimization + xhost - FOR HOST ONLY - to generate AVX (Advanced Vector Extensions) +instructions. + +Automatic Offload using Intel MKL Library +----------------------------------------- + +Intel MKL includes an Automatic Offload (AO) feature that enables +computationally intensive MKL functions called in user code to benefit +from attached Intel Xeon Phi coprocessors automatically and +transparently. + +Behavioural of automatic offload mode is controlled by functions called +within the program or by environmental variables. Complete list of +controls is listed [ +here](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_userguide_lnx/GUID-3DC4FC7D-A1E4-423D-9C0C-06AB265FFA86.htm). + +The Automatic Offload may be enabled by either an MKL function call +within the code: + + mkl_mic_enable(); + +or by setting environment variable + + $ export MKL_MIC_ENABLE=1 + +To get more information about automatic offload please refer to "[Using +Intel® MKL Automatic Offload on Intel ® Xeon Phi™ +Coprocessors](http://software.intel.com/sites/default/files/11MIC42_How_to_Use_MKL_Automatic_Offload_0.pdf)" +white paper or [ Intel MKL +documentation](https://software.intel.com/en-us/articles/intel-math-kernel-library-documentation). + +### Automatic offload example #1 + +Following example show how to automatically offload an SGEMM (single +precision - g dir="auto">eneral matrix multiply) function to +MIC coprocessor. + +At first get an interactive PBS session on a node with MIC accelerator +and load "intel" module that automatically loads "mkl" module as well. + + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + $ module load intel + + The code can be copied to a file and compiled without any necessary +modification. + + $ vim sgemm-ao-short.c + +` +#include <stdio.h> +#include <stdlib.h> +#include <malloc.h> +#include <stdint.h> + +#include "mkl.h" + +int main(int argc, char **argv) +{ +       float *A, *B, *C; /* Matrices */ + +       MKL_INT N = 2560; /* Matrix dimensions */ +       MKL_INT LD = N; /* Leading dimension */ +       int matrix_bytes; /* Matrix size in bytes */ +       int matrix_elements; /* Matrix size in elements */ + +       float alpha = 1.0, beta = 1.0; /* Scaling factors */ +       char transa = 'N', transb = 'N'; /* Transposition options */ + +       int i, j; /* Counters */ + +       matrix_elements = N * N; +       matrix_bytes = sizeof(float) * matrix_elements; + +       /* Allocate the matrices */ +       A = malloc(matrix_bytes); B = malloc(matrix_bytes); C = malloc(matrix_bytes); + +       /* Initialize the matrices */ +       for (i = 0; i < matrix_elements; i++) { +               A[i] = 1.0; B[i] = 2.0; C[i] = 0.0; +       } + +       printf("Computing SGEMM on the hostn"); +       sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + +       printf("Enabling Automatic Offloadn"); +       /* Alternatively, set environment variable MKL_MIC_ENABLE=1 */ +       mkl_mic_enable(); +       +       int ndevices = mkl_mic_get_device_count(); /* Number of MIC devices */ +       printf("Automatic Offload enabled: %d MIC devices presentn",  ndevices); + +       printf("Computing SGEMM with automatic workdivisionn"); +       sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N, &beta, C, &N); + +       /* Free the matrix memory */ +       free(A); free(B); free(C); + +       printf("Donen"); + +   return 0; +} +` + +Please note: This example is simplified version of an example from MKL. +The expanded version can be found here: +$MKL_EXAMPLES/mic_ao/blasc/source/sgemm.c** + +To compile a code using Intel compiler use: + + $ icc -mkl sgemm-ao-short.c -o sgemm + +For debugging purposes enable the offload report to see more information +about automatic offloading. + + $ export OFFLOAD_REPORT=2 + +The output of a code should look similar to following listing, where +lines starting with [MKL] are generated by offload reporting: + + [user@r31u03n799 ~]$ ./sgemm + Computing SGEMM on the host + Enabling Automatic Offload + Automatic Offload enabled: 2 MIC devices present + Computing SGEMM with automatic workdivision + [MKL] [MIC --] [AO Function]   SGEMM + [MKL] [MIC --] [AO SGEMM Workdivision]   0.44 0.28 0.28 + [MKL] [MIC 00] [AO SGEMM CPU Time]   0.252427 seconds + [MKL] [MIC 00] [AO SGEMM MIC Time]   0.091001 seconds + [MKL] [MIC 00] [AO SGEMM CPU->MIC Data]   34078720 bytes + [MKL] [MIC 00] [AO SGEMM MIC->CPU Data]   7864320 bytes + [MKL] [MIC 01] [AO SGEMM CPU Time]   0.252427 seconds + [MKL] [MIC 01] [AO SGEMM MIC Time]   0.094758 seconds + [MKL] [MIC 01] [AO SGEMM CPU->MIC Data]   34078720 bytes + [MKL] [MIC 01] [AO SGEMM MIC->CPU Data]   7864320 bytes + Done + +Behavioral of automatic offload mode is controlled by functions called +within the program or by environmental variables. Complete list of +controls is listed [ +here](http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_userguide_lnx/GUID-3DC4FC7D-A1E4-423D-9C0C-06AB265FFA86.htm). + +To get more information about automatic offload please refer to "[Using +Intel® MKL Automatic Offload on Intel ® Xeon Phi™ +Coprocessors](http://software.intel.com/sites/default/files/11MIC42_How_to_Use_MKL_Automatic_Offload_0.pdf)" +white paper or [ Intel MKL +documentation](https://software.intel.com/en-us/articles/intel-math-kernel-library-documentation). + +### Automatic offload example #2 + +In this example, we will demonstrate automatic offload control via an +environment vatiable MKL_MIC_ENABLE. The function DGEMM will be +offloaded. + +At first get an interactive PBS session on a node with MIC accelerator. + + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + +Once in, we enable the offload and run the Octave software. In octave, +we generate two large random matrices and let them multiply together. + + $ export MKL_MIC_ENABLE=1 + $ export OFFLOAD_REPORT=2 + $ module load Octave/3.8.2-intel-2015b + + $ octave -q + octave:1> A=rand(10000); + octave:2> B=rand(10000); + octave:3> C=A*B; + [MKL] [MIC --] [AO Function]   DGEMM + [MKL] [MIC --] [AO DGEMM Workdivision]   0.14 0.43 0.43 + [MKL] [MIC 00] [AO DGEMM CPU Time]   3.814714 seconds + [MKL] [MIC 00] [AO DGEMM MIC Time]   2.781595 seconds + [MKL] [MIC 00] [AO DGEMM CPU->MIC Data]   1145600000 bytes + [MKL] [MIC 00] [AO DGEMM MIC->CPU Data]   1382400000 bytes + [MKL] [MIC 01] [AO DGEMM CPU Time]   3.814714 seconds + [MKL] [MIC 01] [AO DGEMM MIC Time]   2.843016 seconds + [MKL] [MIC 01] [AO DGEMM CPU->MIC Data]   1145600000 bytes + [MKL] [MIC 01] [AO DGEMM MIC->CPU Data]   1382400000 bytes + octave:4> exit + +On the example above we observe, that the DGEMM function workload was +split over CPU, MIC 0 and MIC 1, in the ratio 0.14 0.43 0.43. The matrix +multiplication was done on the CPU, accelerated by two Xeon Phi +accelerators. + +Native Mode +----------- + +In the native mode a program is executed directly on Intel Xeon Phi +without involvement of the host machine. Similarly to offload mode, the +code is compiled on the host computer with Intel compilers. + +To compile a code user has to be connected to a compute with MIC and +load Intel compilers module. To get an interactive session on a compute +node with an Intel Xeon Phi and load the module use following commands: + + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + + $ module load intel + +Please note that particular version of the Intel module is specified. +This information is used later to specify the correct library paths. + +To produce a binary compatible with Intel Xeon Phi architecture user has +to specify "-mmic" compiler flag. Two compilation examples are shown +below. The first example shows how to compile OpenMP parallel code +"vect-add.c" for host only: + + $ icc -xhost -no-offload -fopenmp vect-add.c -o vect-add-host + +To run this code on host, use: + + $ ./vect-add-host + +The second example shows how to compile the same code for Intel Xeon +Phi: + + $ icc -mmic -fopenmp vect-add.c -o vect-add-mic + +### Execution of the Program in Native Mode on Intel Xeon Phi + +The user access to the Intel Xeon Phi is through the SSH. Since user +home directories are mounted using NFS on the accelerator, users do not +have to copy binary files or libraries between the host and accelerator. + + +Get the PATH of MIC enabled libraries for currently used Intel Compiler +(here was icc/2015.3.187-GNU-5.1.0-2.25 used) : + + $ echo $MIC_LD_LIBRARY_PATH + /apps/all/icc/2015.3.187-GNU-5.1.0-2.25/composer_xe_2015.3.187/compiler/lib/mic + +To connect to the accelerator run: + + $ ssh mic0 + +If the code is sequential, it can be executed directly: + + mic0 $ ~/path_to_binary/vect-add-seq-mic + +If the code is parallelized using OpenMP a set of additional libraries +is required for execution. To locate these libraries new path has to be +added to the LD_LIBRARY_PATH environment variable prior to the +execution: + + mic0 $ export LD_LIBRARY_PATH=/apps/all/icc/2015.3.187-GNU-5.1.0-2.25/composer_xe_2015.3.187/compiler/lib/mic:$LD_LIBRARY_PATH + +Please note that the path exported in the previous example contains path +to a specific compiler (here the version is 2015.3.187-GNU-5.1.0-2.25). +This version number has to match with the version number of the Intel +compiler module that was used to compile the code on the host computer. + +For your information the list of libraries and their location required +for execution of an OpenMP parallel code on Intel Xeon Phi is: + +/apps/all/icc/2015.3.187-GNU-5.1.0-2.25/composer_xe_2015.3.187/compiler/lib/mic + +libiomp5.so +libimf.so +libsvml.so +libirng.so +libintlc.so.5 + +Finally, to run the compiled code use: + + $ ~/path_to_binary/vect-add-mic + +OpenCL +------------------- + +OpenCL (Open Computing Language) is an open standard for +general-purpose parallel programming for diverse mix of multi-core CPUs, +GPU coprocessors, and other parallel processors. OpenCL provides a +flexible execution model and uniform programming environment for +software developers to write portable code for systems running on both +the CPU and graphics processors or accelerators like the Intel® Xeon +Phi. + +On Anselm OpenCL is installed only on compute nodes with MIC +accelerator, therefore OpenCL code can be compiled only on these nodes. + + module load opencl-sdk opencl-rt + +Always load "opencl-sdk" (providing devel files like headers) and +"opencl-rt" (providing dynamic library libOpenCL.so) modules to compile +and link OpenCL code. Load "opencl-rt" for running your compiled code. + +There are two basic examples of OpenCL code in the following +directory: + + /apps/intel/opencl-examples/ + +First example "CapsBasic" detects OpenCL compatible hardware, here +CPU and MIC, and prints basic information about the capabilities of +it. + + /apps/intel/opencl-examples/CapsBasic/capsbasic + +To compile and run the example copy it to your home directory, get +a PBS interactive session on of the nodes with MIC and run make for +compilation. Make files are very basic and shows how the OpenCL code can +be compiled on Anselm. + + $ cp /apps/intel/opencl-examples/CapsBasic/* . + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + $ make + +The compilation command for this example is: + + $ g++ capsbasic.cpp -lOpenCL -o capsbasic -I/apps/intel/opencl/include/ + +After executing the complied binary file, following output should +be displayed. + + ./capsbasic + + Number of available platforms: 1 + Platform names: +    [0] Intel(R) OpenCL [Selected] + Number of devices available for each type: +    CL_DEVICE_TYPE_CPU: 1 +    CL_DEVICE_TYPE_GPU: 0 +    CL_DEVICE_TYPE_ACCELERATOR: 1 + + ** Detailed information for each device *** + + CL_DEVICE_TYPE_CPU[0] +    CL_DEVICE_NAME:       Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz +    CL_DEVICE_AVAILABLE: 1 + + ... + + CL_DEVICE_TYPE_ACCELERATOR[0] +    CL_DEVICE_NAME: Intel(R) Many Integrated Core Acceleration Card +    CL_DEVICE_AVAILABLE: 1 + + ... + +More information about this example can be found on Intel website: +<http://software.intel.com/en-us/vcsource/samples/caps-basic/> + +The second example that can be found in +"/apps/intel/opencl-examples" >directory is General Matrix +Multiply. You can follow the the same procedure to download the example +to your directory and compile it. + + $ cp -r /apps/intel/opencl-examples/* . + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + $ cd GEMM + $ make + +The compilation command for this example is: + + $ g++ cmdoptions.cpp gemm.cpp ../common/basic.cpp ../common/cmdparser.cpp ../common/oclobject.cpp -I../common -lOpenCL -o gemm -I/apps/intel/opencl/include/ + +To see the performance of Intel Xeon Phi performing the DGEMM run +the example as follows: + + ./gemm -d 1 + Platforms (1): + [0] Intel(R) OpenCL [Selected] + Devices (2): + [0] Intel(R) Xeon(R) CPU E5-2470 0 @ 2.30GHz + [1] Intel(R) Many Integrated Core Acceleration Card [Selected] + Build program options: "-DT=float -DTILE_SIZE_M=1 -DTILE_GROUP_M=16 -DTILE_SIZE_N=128 -DTILE_GROUP_N=1 -DTILE_SIZE_K=8" + Running gemm_nn kernel with matrix size: 3968x3968 + Memory row stride to ensure necessary alignment: 15872 bytes + Size of memory region for one matrix: 62980096 bytes + Using alpha = 0.57599 and beta = 0.872412 + ... + Host time: 0.292953 sec. + Host perf: 426.635 GFLOPS + Host time: 0.293334 sec. + Host perf: 426.081 GFLOPS + ... + +Please note: GNU compiler is used to compile the OpenCL codes for +Intel MIC. You do not need to load Intel compiler module. + +MPI +---------------- + +### Environment setup and compilation + +To achieve best MPI performance always use following setup for Intel MPI +on Xeon Phi accelerated nodes: + + $ export I_MPI_FABRICS=shm:dapl + $ export I_MPI_DAPL_PROVIDER_LIST=ofa-v2-mlx4_0-1u,ofa-v2-scif0,ofa-v2-mcm-1 + +This ensures, that MPI inside node will use SHMEM communication, between +HOST and Phi the IB SCIF will be used and between different nodes or +Phi's on diferent nodes a CCL-Direct proxy will be used. + +Please note: Other FABRICS like tcp,ofa may be used (even combined with +shm) but there's severe loss of performance (by order of magnitude). +Usage of single DAPL PROVIDER (e. g. +I_MPI_DAPL_PROVIDER=ofa-v2-mlx4_0-1u) will cause failure of +Host<->Phi and/or Phi<->Phi communication. +Usage of the I_MPI_DAPL_PROVIDER_LIST on non-accelerated node will +cause failure of any MPI communication, since those nodes don't have +SCIF device and there's no CCL-Direct proxy runnig. + +Again an MPI code for Intel Xeon Phi has to be compiled on a compute +node with accelerator and MPSS software stack installed. To get to a +compute node with accelerator use: + + $ qsub -I -q qprod -l select=1:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + +The only supported implementation of MPI standard for Intel Xeon Phi is +Intel MPI. To setup a fully functional development environment a +combination of Intel compiler and Intel MPI has to be used. On a host +load following modules before compilation: + + $ module load intel impi + +To compile an MPI code for host use: + + $ mpiicc -xhost -o mpi-test mpi-test.c + +To compile the same code for Intel Xeon Phi architecture use: + + $ mpiicc -mmic -o mpi-test-mic mpi-test.c + +Or, if you are using Fortran : + + $ mpiifort -mmic -o mpi-test-mic mpi-test.f90 + +An example of basic MPI version of "hello-world" example in C language, +that can be executed on both host and Xeon Phi is (can be directly copy +and pasted to a .c file) + +` +#include <stdio.h> +#include <mpi.h> + +int main (argc, argv) +    int argc; +    char *argv[]; +{ + int rank, size; + + int len; + char node[MPI_MAX_PROCESSOR_NAME]; + + MPI_Init (&argc, &argv);     /* starts MPI */ + MPI_Comm_rank (MPI_COMM_WORLD, &rank);       /* get current process id */ + MPI_Comm_size (MPI_COMM_WORLD, &size);       /* get number of processes */ + + MPI_Get_processor_name(node,&len); + + printf( "Hello world from process %d of %d on host %s n", rank, size, node ); + MPI_Finalize(); + return 0; +} +` + +### MPI programming models + +Intel MPI for the Xeon Phi coprocessors offers different MPI +programming models: + +Host-only model** - all MPI ranks reside on the host. The coprocessors +can be used by using offload pragmas. (Using MPI calls inside offloaded +code is not supported.)** + +Coprocessor-only model** - all MPI ranks reside only on the +coprocessors. + +Symmetric model** - the MPI ranks reside on both the host and the +coprocessor. Most general MPI case. + +###Host-only model + +In this case all environment variables are set by modules, +so to execute the compiled MPI program on a single node, use: + + $ mpirun -np 4 ./mpi-test + +The output should be similar to: + + Hello world from process 1 of 4 on host r38u31n1000 + Hello world from process 3 of 4 on host r38u31n1000 + Hello world from process 2 of 4 on host r38u31n1000 + Hello world from process 0 of 4 on host r38u31n1000 + +### Coprocessor-only model + +There are two ways how to execute an MPI code on a single +coprocessor: 1.) lunch the program using "**mpirun**" from the +coprocessor; or 2.) lunch the task using "**mpiexec.hydra**" from a +host. + +Execution on coprocessor** + +Similarly to execution of OpenMP programs in native mode, since the +environmental module are not supported on MIC, user has to setup paths +to Intel MPI libraries and binaries manually. One time setup can be done +by creating a "**.profile**" file in user's home directory. This file +sets up the environment on the MIC automatically once user access to the +accelerator through the SSH. + +At first get the LD_LIBRARY_PATH for currenty used Intel Compiler and +Intel MPI: + + $ echo $MIC_LD_LIBRARY_PATH + /apps/all/imkl/11.2.3.187-iimpi-7.3.5-GNU-5.1.0-2.25/mkl/lib/mic:/apps/all/imkl/11.2.3.187-iimpi-7.3.5-GNU-5.1.0-2.25/lib/mic:/apps/all/icc/2015.3.187-GNU-5.1.0-2.25/composer_xe_2015.3.187/compiler/lib/mic/ + +Use it in your ~/.profile: + + $ vim ~/.profile + + PS1='[u@h W]$ ' + export PATH=/usr/bin:/usr/sbin:/bin:/sbin + + #IMPI + export PATH=/apps/all/impi/5.0.3.048-iccifort-2015.3.187-GNU-5.1.0-2.25/mic/bin/:$PATH + + #OpenMP (ICC, IFORT), IMKL and IMPI + export LD_LIBRARY_PATH=/apps/all/imkl/11.2.3.187-iimpi-7.3.5-GNU-5.1.0-2.25/mkl/lib/mic:/apps/all/imkl/11.2.3.187-iimpi-7.3.5-GNU-5.1.0-2.25/lib/mic:/apps/all/icc/2015.3.187-GNU-5.1.0-2.25/composer_xe_2015.3.187/compiler/lib/mic:$LD_LIBRARY_PATH + +Please note: + - this file sets up both environmental variable for both MPI and OpenMP +libraries. + - this file sets up the paths to a particular version of Intel MPI +library and particular version of an Intel compiler. These versions have +to match with loaded modules. + +To access a MIC accelerator located on a node that user is currently +connected to, use: + + $ ssh mic0 + +or in case you need specify a MIC accelerator on a particular node, use: + + $ ssh r38u31n1000-mic0 + +To run the MPI code in parallel on multiple core of the accelerator, +use: + + $ mpirun -np 4 ./mpi-test-mic + +The output should be similar to: + + Hello world from process 1 of 4 on host r38u31n1000-mic0 + Hello world from process 2 of 4 on host r38u31n1000-mic0 + Hello world from process 3 of 4 on host r38u31n1000-mic0 + Hello world from process 0 of 4 on host r38u31n1000-mic0 + + ** + +**Execution on host** + +If the MPI program is launched from host instead of the coprocessor, the +environmental variables are not set using the ".profile" file. Therefore +user has to specify library paths from the command line when calling +"mpiexec". + +First step is to tell mpiexec that the MPI should be executed on a local +accelerator by setting up the environmental variable "I_MPI_MIC" + + $ export I_MPI_MIC=1 + +Now the MPI program can be executed as: + + $ mpirun -genv LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH -host mic0 -n 4 ~/mpi-test-mic + +or using mpirun + + $ mpirun -genv LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH -host mic0 -n 4 ~/mpi-test-mic + +Please note: + - the full path to the binary has to specified (here: +"**>~/mpi-test-mic**") + - the LD_LIBRARY_PATH has to match with Intel MPI module used to +compile the MPI code + +The output should be again similar to: + + Hello world from process 1 of 4 on host r38u31n1000-mic0 + Hello world from process 2 of 4 on host r38u31n1000-mic0 + Hello world from process 3 of 4 on host r38u31n1000-mic0 + Hello world from process 0 of 4 on host r38u31n1000-mic0 + +Please note that the "mpiexec.hydra" requires a file +"**>pmi_proxy**" from Intel MPI library to be copied to the +MIC filesystem. If the file is missing please contact the system +administrators. A simple test to see if the file is present is to +execute: + +   $ ssh mic0 ls /bin/pmi_proxy +  /bin/pmi_proxy + + ** + +**Execution on host - MPI processes distributed over multiple +accelerators on multiple nodes** + +To get access to multiple nodes with MIC accelerator, user has to +use PBS to allocate the resources. To start interactive session, that +allocates 2 compute nodes = 2 MIC accelerators run qsub command with +following parameters: + + $ qsub -I -q qprod -l select=2:ncpus=24:accelerator=True:naccelerators=2:accelerator_model=phi7120 -A NONE-0-0 + + $ module load intel impi + +This command connects user through ssh to one of the nodes +immediately. To see the other nodes that have been allocated use: + + $ cat $PBS_NODEFILE + +For example: + + r38u31n1000.bullx + r38u32n1001.bullx + +This output means that the PBS allocated nodes r38u31n1000 and +r38u32n1001, which means that user has direct access to +"**r38u31n1000-mic0**" and "**>r38u32n1001-mic0**" +accelerators. + +Please note: At this point user can connect to any of the +allocated nodes or any of the allocated MIC accelerators using ssh: +- to connect to the second node : ** $ +ssh >r38u32n1001** +- to connect to the accelerator on the first node from the first +node: **$ ssh +r38u31n1000-mic0** or ** +$ ssh mic0** +-** to connect to the accelerator on the second node from the first +node: **$ ssh +r38u32n1001-mic0** + +At this point we expect that correct modules are loaded and binary +is compiled. For parallel execution the mpiexec.hydra is used. +Again the first step is to tell mpiexec that the MPI can be executed on +MIC accelerators by setting up the environmental variable "I_MPI_MIC", +don't forget to have correct FABRIC and PROVIDER defined. + + $ export I_MPI_MIC=1 + $ export I_MPI_FABRICS=shm:dapl + $ export I_MPI_DAPL_PROVIDER_LIST=ofa-v2-mlx4_0-1u,ofa-v2-scif0,ofa-v2-mcm-1 + +The launch the MPI program use: + + $ mpirun -genv LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH + -host r38u31n1000-mic0 -n 4 ~/mpi-test-mic + : -host r38u32n1001-mic0 -n 6 ~/mpi-test-mic + +or using mpirun: + + $ mpirun -genv LD_LIBRARY_PATH + -host r38u31n1000-mic0 -n 4 ~/mpi-test-mic + : -host r38u32n1001-mic0 -n 6 ~/mpi-test-mic + +In this case four MPI processes are executed on accelerator +r38u31n1000-mic and six processes are executed on accelerator +r38u32n1001-mic0. The sample output (sorted after execution) is: + + Hello world from process 0 of 10 on host r38u31n1000-mic0 + Hello world from process 1 of 10 on host r38u31n1000-mic0 + Hello world from process 2 of 10 on host r38u31n1000-mic0 + Hello world from process 3 of 10 on host r38u31n1000-mic0 + Hello world from process 4 of 10 on host r38u32n1001-mic0 + Hello world from process 5 of 10 on host r38u32n1001-mic0 + Hello world from process 6 of 10 on host r38u32n1001-mic0 + Hello world from process 7 of 10 on host r38u32n1001-mic0 + Hello world from process 8 of 10 on host r38u32n1001-mic0 + Hello world from process 9 of 10 on host r38u32n1001-mic0 + +The same way MPI program can be executed on multiple hosts: + + $ mpirun -genv LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH + -host r38u31n1000 -n 4 ~/mpi-test + : -host r38u32n1001 -n 6 ~/mpi-test + +###Symmetric model + +In a symmetric mode MPI programs are executed on both host +computer(s) and MIC accelerator(s). Since MIC has a different +architecture and requires different binary file produced by the Intel +compiler two different files has to be compiled before MPI program is +executed. + +In the previous section we have compiled two binary files, one for +hosts "**mpi-test**" and one for MIC accelerators "**mpi-test-mic**". +These two binaries can be executed at once using mpiexec.hydra: + + $ mpirun + -genv $MIC_LD_LIBRARY_PATH + -host r38u32n1001 -n 2 ~/mpi-test + : -host r38u32n1001-mic0 -n 2 ~/mpi-test-mic + +In this example the first two parameters (line 2 and 3) sets up required +environment variables for execution. The third line specifies binary +that is executed on host (here r38u32n1001) and the last line specifies +the binary that is execute on the accelerator (here r38u32n1001-mic0). + +The output of the program is: + + Hello world from process 0 of 4 on host r38u32n1001 + Hello world from process 1 of 4 on host r38u32n1001 + Hello world from process 2 of 4 on host r38u32n1001-mic0 + Hello world from process 3 of 4 on host r38u32n1001-mic0 + +The execution procedure can be simplified by using the mpirun +command with the machine file a a parameter. Machine file contains list +of all nodes and accelerators that should used to execute MPI processes. + +An example of a machine file that uses 2 >hosts (r38u32n1001 +and r38u33n1002) and 2 accelerators **(r38u32n1001-mic0** and +r38u33n1002-mic0**) to run 2 MPI processes +on each of them: + + $ cat hosts_file_mix + r38u32n1001:2 + r38u32n1001-mic0:2 + r38u33n1002:2 + r38u33n1002-mic0:2 + +In addition if a naming convention is set in a way that the name +of the binary for host is **"bin_name"** and the name of the binary +for the accelerator is **"bin_name-mic"** then by setting up the +environment variable **I_MPI_MIC_POSTFIX** to **"-mic"** user do not +have to specify the names of booth binaries. In this case mpirun needs +just the name of the host binary file (i.e. "mpi-test") and uses the +suffix to get a name of the binary for accelerator (i..e. +"mpi-test-mic"). + + $ export I_MPI_MIC_POSTFIX=-mic + + >To run the MPI code using mpirun and the machine file +"hosts_file_mix" use: + + $ mpirun + -genv LD_LIBRARY_PATH $MIC_LD_LIBRARY_PATH + -machinefile hosts_file_mix + ~/mpi-test + +A possible output of the MPI "hello-world" example executed on two +hosts and two accelerators is: + + Hello world from process 0 of 8 on host r38u31n1000 + Hello world from process 1 of 8 on host r38u31n1000 + Hello world from process 2 of 8 on host r38u31n1000-mic0 + Hello world from process 3 of 8 on host r38u31n1000-mic0 + Hello world from process 4 of 8 on host r38u32n1001 + Hello world from process 5 of 8 on host r38u32n1001 + Hello world from process 6 of 8 on host r38u32n1001-mic0 + Hello world from process 7 of 8 on host r38u32n1001-mic0 + +Using the PBS automatically generated node-files + +PBS also generates a set of node-files that can be used instead of +manually creating a new one every time. Three node-files are genereated: + +**Host only node-file:** + - /lscratch/${PBS_JOBID}/nodefile-cn +MIC only node-file: + - /lscratch/${PBS_JOBID}/nodefile-mic +Host and MIC node-file: + - /lscratch/${PBS_JOBID}/nodefile-mix + +Please note each host or accelerator is listed only per files. User has +to specify how many jobs should be executed per node using "-n" +parameter of the mpirun command. + +Optimization +------------ + +For more details about optimization techniques please read Intel +document [Optimization and Performance Tuning for Intel® Xeon Phi™ +Coprocessors](http://software.intel.com/en-us/articles/optimization-and-performance-tuning-for-intel-xeon-phi-coprocessors-part-1-optimization "http://software.intel.com/en-us/articles/optimization-and-performance-tuning-for-intel-xeon-phi-coprocessors-part-1-optimization") + diff --git a/converted/docs.it4i.cz/salomon/software/java.md b/converted/docs.it4i.cz/salomon/software/java.md new file mode 100644 index 0000000000000000000000000000000000000000..ced88c9ef38aacf8874e167640e971e2ace9274b --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/java.md @@ -0,0 +1,33 @@ +Java +==== + +Java on the cluster + + + +Java is available on the cluster. Activate java by loading the Java +module + + $ module load Java + +Note that the Java module must be loaded on the compute nodes as well, +in order to run java on compute nodes. + +Check for java version and path + + $ java -version + $ which java + +With the module loaded, not only the runtime environment (JRE), but also +the development environment (JDK) with the compiler is available. + + $ javac -version + $ which javac + +Java applications may use MPI for interprocess communication, in +conjunction with OpenMPI. Read more +on <http://www.open-mpi.org/faq/?category=java>. +This functionality is currently not supported on Anselm cluster. In case +you require the java interface to MPI, please contact [cluster +support](https://support.it4i.cz/rt/). + diff --git a/converted/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.md b/converted/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.md new file mode 100644 index 0000000000000000000000000000000000000000..856a3e95e6d41dbbe6eb756762fa53fa50ed2164 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.md @@ -0,0 +1,241 @@ +Running OpenMPI +=============== + + + +OpenMPI program execution +------------------------- + +The OpenMPI programs may be executed only via the PBS Workload manager, +by entering an appropriate queue. On the cluster, the **OpenMPI 1.8.6** +is OpenMPI based MPI implementation. + +### Basic usage + +Use the mpiexec to run the OpenMPI code. + +Example: + + $ qsub -q qexp -l select=4:ncpus=24 -I + qsub: waiting for job 15210.isrv5 to start + qsub: job 15210.isrv5 ready + + $ pwd + /home/username + + $ module load OpenMPI + $ mpiexec -pernode ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host r1i0n17 + Hello world! from rank 1 of 4 on host r1i0n5 + Hello world! from rank 2 of 4 on host r1i0n6 + Hello world! from rank 3 of 4 on host r1i0n7 + +Please be aware, that in this example, the directive **-pernode** is +used to run only **one task per node**, which is normally an unwanted +behaviour (unless you want to run hybrid code with just one MPI and 24 +OpenMP tasks per node). In normal MPI programs **omit the -pernode +directive** to run up to 24 MPI tasks per each node. + +In this example, we allocate 4 nodes via the express queue +interactively. We set up the openmpi environment and interactively run +the helloworld_mpi.x program. +Note that the executable +helloworld_mpi.x must be available within the +same path on all nodes. This is automatically fulfilled on the /home and +/scratch filesystem. + +You need to preload the executable, if running on the local ramdisk /tmp +filesystem + + $ pwd + /tmp/pbs.15210.isrv5 + + $ mpiexec -pernode --preload-binary ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host r1i0n17 + Hello world! from rank 1 of 4 on host r1i0n5 + Hello world! from rank 2 of 4 on host r1i0n6 + Hello world! from rank 3 of 4 on host r1i0n7 + +In this example, we assume the executable +helloworld_mpi.x is present on compute node +r1i0n17 on ramdisk. We call the mpiexec whith the **--preload-binary** +argument (valid for openmpi). The mpiexec will copy the executable from +r1i0n17 to the /tmp/pbs.15210.isrv5 +directory on r1i0n5, r1i0n6 and r1i0n7 and execute the program. + +MPI process mapping may be controlled by PBS parameters. + +The mpiprocs and ompthreads parameters allow for selection of number of +running MPI processes per node as well as number of OpenMP threads per +MPI process. + +### One MPI process per node + +Follow this example to run one MPI process per node, 24 threads per +process. + + $ qsub -q qexp -l select=4:ncpus=24:mpiprocs=1:ompthreads=24 -I + + $ module load OpenMPI + + $ mpiexec --bind-to-none ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 1 MPI processes per node and 24 threads per socket, +on 4 nodes. + +### Two MPI processes per node + +Follow this example to run two MPI processes per node, 8 threads per +process. Note the options to mpiexec. + + $ qsub -q qexp -l select=4:ncpus=24:mpiprocs=2:ompthreads=12 -I + + $ module load OpenMPI + + $ mpiexec -bysocket -bind-to-socket ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 2 MPI processes per node and 12 threads per socket, +each process and its threads bound to a separate processor socket of the +node, on 4 nodes + +### 24 MPI processes per node + +Follow this example to run 24 MPI processes per node, 1 thread per +process. Note the options to mpiexec. + + $ qsub -q qexp -l select=4:ncpus=24:mpiprocs=24:ompthreads=1 -I + + $ module load OpenMPI + + $ mpiexec -bycore -bind-to-core ./helloworld_mpi.x + +In this example, we demonstrate recommended way to run an MPI +application, using 24 MPI processes per node, single threaded. Each +process is bound to separate processor core, on 4 nodes. + +### OpenMP thread affinity + +Important! Bind every OpenMP thread to a core! + +In the previous two examples with one or two MPI processes per node, the +operating system might still migrate OpenMP threads between cores. You +might want to avoid this by setting these environment variable for GCC +OpenMP: + + $ export GOMP_CPU_AFFINITY="0-23" + +or this one for Intel OpenMP: + + $ export KMP_AFFINITY=granularity=fine,compact,1,0 + +As of OpenMP 4.0 (supported by GCC 4.9 and later and Intel 14.0 and +later) the following variables may be used for Intel or GCC: + + $ export OMP_PROC_BIND=true + $ export OMP_PLACES=cores + +OpenMPI Process Mapping and Binding +------------------------------------------------ + +The mpiexec allows for precise selection of how the MPI processes will +be mapped to the computational nodes and how these processes will bind +to particular processor sockets and cores. + +MPI process mapping may be specified by a hostfile or rankfile input to +the mpiexec program. Altough all implementations of MPI provide means +for process mapping and binding, following examples are valid for the +openmpi only. + +### Hostfile + +Example hostfile + + r1i0n17.smc.salomon.it4i.cz + r1i0n5.smc.salomon.it4i.cz + r1i0n6.smc.salomon.it4i.cz + r1i0n7.smc.salomon.it4i.cz + +Use the hostfile to control process placement + + $ mpiexec -hostfile hostfile ./helloworld_mpi.x + Hello world! from rank 0 of 4 on host r1i0n17 + Hello world! from rank 1 of 4 on host r1i0n5 + Hello world! from rank 2 of 4 on host r1i0n6 + Hello world! from rank 3 of 4 on host r1i0n7 + +In this example, we see that ranks have been mapped on nodes according +to the order in which nodes show in the hostfile + +### Rankfile + +Exact control of MPI process placement and resource binding is provided +by specifying a rankfile + +Appropriate binding may boost performance of your application. + +Example rankfile + + rank 0=r1i0n7.smc.salomon.it4i.cz slot=1:0,1 + rank 1=r1i0n6.smc.salomon.it4i.cz slot=0:* + rank 2=r1i0n5.smc.salomon.it4i.cz slot=1:1-2 + rank 3=r1i0n17.smc.salomon slot=0:1,1:0-2 + rank 4=r1i0n6.smc.salomon.it4i.cz slot=0:*,1:* + +This rankfile assumes 5 ranks will be running on 4 nodes and provides +exact mapping and binding of the processes to the processor sockets and +cores + +Explanation: +rank 0 will be bounded to r1i0n7, socket1 core0 and core1 +rank 1 will be bounded to r1i0n6, socket0, all cores +rank 2 will be bounded to r1i0n5, socket1, core1 and core2 +rank 3 will be bounded to r1i0n17, socket0 core1, socket1 core0, core1, +core2 +rank 4 will be bounded to r1i0n6, all cores on both sockets + + $ mpiexec -n 5 -rf rankfile --report-bindings ./helloworld_mpi.x + [r1i0n17:11180] MCW rank 3 bound to socket 0[core 1] socket 1[core 0-2]: [. B . . . . . . . . . .][B B B . . . . . . . . .] (slot list 0:1,1:0-2) + [r1i0n7:09928] MCW rank 0 bound to socket 1[core 0-1]: [. . . . . . . . . . . .][B B . . . . . . . . . .] (slot list 1:0,1) + [r1i0n6:10395] MCW rank 1 bound to socket 0[core 0-7]: [B B B B B B B B B B B B][. . . . . . . . . . . .] (slot list 0:*) + [r1i0n5:10406] MCW rank 2 bound to socket 1[core 1-2]: [. . . . . . . . . . . .][. B B . . . . . . . . .] (slot list 1:1-2) + [r1i0n6:10406] MCW rank 4 bound to socket 0[core 0-7] socket 1[core 0-7]: [B B B B B B B B B B B B][B B B B B B B B B B B B] (slot list 0:*,1:*) + Hello world! from rank 3 of 5 on host r1i0n17 + Hello world! from rank 1 of 5 on host r1i0n6 + Hello world! from rank 0 of 5 on host r1i0n7 + Hello world! from rank 4 of 5 on host r1i0n6 + Hello world! from rank 2 of 5 on host r1i0n5 + +In this example we run 5 MPI processes (5 ranks) on four nodes. The +rankfile defines how the processes will be mapped on the nodes, sockets +and cores. The **--report-bindings** option was used to print out the +actual process location and bindings. Note that ranks 1 and 4 run on the +same node and their core binding overlaps. + +It is users responsibility to provide correct number of ranks, sockets +and cores. + +### Bindings verification + +In all cases, binding and threading may be verified by executing for +example: + + $ mpiexec -bysocket -bind-to-socket --report-bindings echo + $ mpiexec -bysocket -bind-to-socket numactl --show + $ mpiexec -bysocket -bind-to-socket echo $OMP_NUM_THREADS + +Changes in OpenMPI 1.8 +---------------------- + +Some options have changed in OpenMPI version 1.8. + + |version 1.6.5 |version 1.8.1 | + | --- | --- | + |--bind-to-none |--bind-to none | + |--bind-to-core |--bind-to core | + |--bind-to-socket |--bind-to socket | + |-bysocket |--map-by socket | + |-bycore |--map-by core | + |-pernode |--map-by ppr:1:node\ | + diff --git a/converted/docs.it4i.cz/salomon/software/mpi-1/mpi.md b/converted/docs.it4i.cz/salomon/software/mpi-1/mpi.md new file mode 100644 index 0000000000000000000000000000000000000000..13c3f15090e589d6a7d3983f0a98a9c220ff0f92 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/mpi-1/mpi.md @@ -0,0 +1,185 @@ +MPI +=== + + + +Setting up MPI Environment +-------------------------- + +The Salomon cluster provides several implementations of the MPI library: + + ------------------------------------------------------------------------- + MPI Library Thread support + ------ |---|---|--- + **Intel MPI 4.1** Full thread support up to + MPI_THREAD_MULTIPLE + + **Intel MPI 5.0** Full thread support up to + MPI_THREAD_MULTIPLE + + OpenMPI 1.8.6 Full thread support up to + MPI_THREAD_MULTIPLE, MPI-3.0 + support + + SGI MPT 2.12 + ------------------------------------------------------------------------- + +MPI libraries are activated via the environment modules. + +Look up section modulefiles/mpi in module avail + + $ module avail + ------------------------------ /apps/modules/mpi ------------------------------- + impi/4.1.1.036-iccifort-2013.5.192 + impi/4.1.1.036-iccifort-2013.5.192-GCC-4.8.3 + impi/5.0.3.048-iccifort-2015.3.187 + impi/5.0.3.048-iccifort-2015.3.187-GNU-5.1.0-2.25 + MPT/2.12 + OpenMPI/1.8.6-GNU-5.1.0-2.25 + +There are default compilers associated with any particular MPI +implementation. The defaults may be changed, the MPI libraries may be +used in conjunction with any compiler. +The defaults are selected via the modules in following way + + -------------------------------------------------------------------------- + Module MPI Compiler suite + ------------------ |---|---|-------------- ------------------------ + impi-5.0.3.048-iccifort- Intel MPI 5.0.3 + 2015.3.187 + + OpenMP-1.8.6-GNU-5.1.0-2 OpenMPI 1.8.6 + .25 + -------------------------------------------------------------------------- + +Examples: + + $ module load gompi/2015b + +In this example, we activate the latest OpenMPI with latest GNU +compilers (OpenMPI 1.8.6 and GCC 5.1). Please see more information about +toolchains in section [Environment and +Modules](../../environment-and-modules.html) . + +To use OpenMPI with the intel compiler suite, use + + $ module load iompi/2015.03 + +In this example, the openmpi 1.8.6 using intel compilers is activated. +It's used "iompi" toolchain. + +Compiling MPI Programs +---------------------- + +After setting up your MPI environment, compile your program using one of +the mpi wrappers + + $ mpicc -v + $ mpif77 -v + $ mpif90 -v + +When using Intel MPI, use the following MPI wrappers: + + $ mpicc + $ mpiifort + +Wrappers mpif90, mpif77 that are provided by Intel MPI are designed for +gcc and gfortran. You might be able to compile MPI code by them even +with Intel compilers, but you might run into problems (for example, +native MIC compilation with -mmic does not work with mpif90). + +Example program: + + // helloworld_mpi.c + #include <stdio.h> + + #include<mpi.h> + + int main(int argc, char **argv) { + + int len; + int rank, size; + char node[MPI_MAX_PROCESSOR_NAME]; + + // Initiate MPI + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + MPI_Comm_size(MPI_COMM_WORLD,&size); + + // Get hostame and print + MPI_Get_processor_name(node,&len); + printf("Hello world! from rank %d of %d on host %sn",rank,size,node); + + // Finalize and exit + MPI_Finalize(); + + return 0; + } + +Compile the above example with + + $ mpicc helloworld_mpi.c -o helloworld_mpi.x + +Running MPI Programs +-------------------- + +The MPI program executable must be compatible with the loaded MPI +module. +Always compile and execute using the very same MPI module. + +It is strongly discouraged to mix mpi implementations. Linking an +application with one MPI implementation and running mpirun/mpiexec form +other implementation may result in unexpected errors. + +The MPI program executable must be available within the same path on all +nodes. This is automatically fulfilled on the /home and /scratch +filesystem. You need to preload the executable, if running on the local +scratch /lscratch filesystem. + +### Ways to run MPI programs + +Optimal way to run an MPI program depends on its memory requirements, +memory access pattern and communication pattern. + +Consider these ways to run an MPI program: +1. One MPI process per node, 24 threads per process +2. Two MPI processes per node, 12 threads per process +3. 24 MPI processes per node, 1 thread per process. + +One MPI** process per node, using 24 threads, is most useful for +memory demanding applications, that make good use of processor cache +memory and are not memory bound. This is also a preferred way for +communication intensive applications as one process per node enjoys full +bandwidth access to the network interface. + +Two MPI** processes per node, using 12 threads each, bound to +processor socket is most useful for memory bandwidth bound applications +such as BLAS1 or FFT, with scalable memory demand. However, note that +the two processes will share access to the network interface. The 12 +threads and socket binding should ensure maximum memory access bandwidth +and minimize communication, migration and numa effect overheads. + +Important! Bind every OpenMP thread to a core! + +In the previous two cases with one or two MPI processes per node, the +operating system might still migrate OpenMP threads between cores. You +want to avoid this by setting the KMP_AFFINITY or GOMP_CPU_AFFINITY +environment variables. + +**24 MPI** processes per node, using 1 thread each bound to processor +core is most suitable for highly scalable applications with low +communication demand. + +### Running OpenMPI + +The [**OpenMPI 1.8.6**](http://www.open-mpi.org/) is +based on OpenMPI. Read more on [how to run +OpenMPI](Running_OpenMPI.html) based MPI. + + + +The Intel MPI may run on the[Intel Xeon +Ph](../intel-xeon-phi.html)i accelerators as well. Read +more on [how to run Intel MPI on +accelerators](../intel-xeon-phi.html). + diff --git a/converted/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.md b/converted/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.md new file mode 100644 index 0000000000000000000000000000000000000000..00577440a001d32dfa7ed93554daa66b1c3136d4 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.md @@ -0,0 +1,105 @@ +MPI4Py (MPI for Python) +======================= + +OpenMPI interface to Python + + + +Introduction +------------ + +MPI for Python provides bindings of the Message Passing Interface (MPI) +standard for the Python programming language, allowing any Python +program to exploit multiple processors. + +This package is constructed on top of the MPI-1/2 specifications and +provides an object oriented interface which closely follows MPI-2 C++ +bindings. It supports point-to-point (sends, receives) and collective +(broadcasts, scatters, gathers) communications of any picklable Python +object, as well as optimized communications of Python object exposing +the single-segment buffer interface (NumPy arrays, builtin +bytes/string/array objects). + +On Anselm MPI4Py is available in standard Python modules. + +Modules +------- + +MPI4Py is build for OpenMPI. Before you start with MPI4Py you need to +load Python and OpenMPI modules. You can use toolchain, that loads +Python and OpenMPI at once. + + $ module load Python/2.7.9-foss-2015g + +Execution +--------- + +You need to import MPI to your python program. Include the following +line to the python script: + + from mpi4py import MPI + +The MPI4Py enabled python programs [execute as any other +OpenMPI](Running_OpenMPI.html) code.The simpliest way is +to run + + $ mpiexec python <script>.py + +For example + + $ mpiexec python hello_world.py + +Examples +-------- + +### Hello world! + + from mpi4py import MPI + + comm = MPI.COMM_WORLD + + print "Hello! I'm rank %d from %d running in total..." % (comm.rank, comm.size) + + comm.Barrier()  # wait for everybody to synchronize + +###Collective Communication with NumPy arrays + + from __future__ import division + from mpi4py import MPI + import numpy as np + + comm = MPI.COMM_WORLD + + print("-"*78) + print(" Running on %d cores" % comm.size) + print("-"*78) + + comm.Barrier() + + # Prepare a vector of N=5 elements to be broadcasted... + N = 5 + if comm.rank == 0: +   A = np.arange(N, dtype=np.float64)   # rank 0 has proper data + else: +   A = np.empty(N, dtype=np.float64)   # all other just an empty array + + # Broadcast A from rank 0 to everybody + comm.Bcast( [A, MPI.DOUBLE] ) + + # Everybody should now have the same... + print "[%02d] %s" % (comm.rank, A) + +Execute the above code as: + + $ qsub -q qexp -l select=4:ncpus=24:mpiprocs=24:ompthreads=1 -I + + $ module load Python/2.7.9-foss-2015g + + $ mpiexec --map-by core --bind-to core python hello_world.py + +In this example, we run MPI4Py enabled code on 4 nodes, 24 cores per +node (total of 96 processes), each python process is bound to a +different core. +More examples and documentation can be found on [MPI for Python +webpage](https://pythonhosted.org/mpi4py/usrman/index.html). + diff --git a/converted/docs.it4i.cz/salomon/software/numerical-languages/introduction.md b/converted/docs.it4i.cz/salomon/software/numerical-languages/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..6416c41fda748019486f6b2dffcdc48c87c0346a --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/numerical-languages/introduction.md @@ -0,0 +1,48 @@ +Numerical languages +=================== + +Interpreted languages for numerical computations and analysis + + + +Introduction +------------ + +This section contains a collection of high-level interpreted languages, +primarily intended for numerical computations. + +Matlab +------ + +MATLAB®^ is a high-level language and interactive environment for +numerical computation, visualization, and programming. + + $ module load MATLAB + $ matlab + +Read more at the [Matlab +page](matlab.html). + +Octave +------ + +GNU Octave is a high-level interpreted language, primarily intended for +numerical computations. The Octave language is quite similar to Matlab +so that most programs are easily portable. + + $ module load Octave + $ octave + +Read more at the [Octave page](octave.html). + +R +- + +The R is an interpreted language and environment for statistical +computing and graphics. + + $ module load R + $ R + +Read more at the [R page](r.html). + diff --git a/converted/docs.it4i.cz/salomon/software/numerical-languages/matlab.md b/converted/docs.it4i.cz/salomon/software/numerical-languages/matlab.md new file mode 100644 index 0000000000000000000000000000000000000000..c713cd7c96b544ea722d8e74537cb78e3b1b1edd --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/numerical-languages/matlab.md @@ -0,0 +1,345 @@ +Matlab +====== + + + +Introduction +------------ + +Matlab is available in versions R2015a and R2015b. There are always two +variants of the release: + +- Non commercial or so called EDU variant, which can be used for + common research and educational purposes. +- Commercial or so called COM variant, which can used also for + commercial activities. The licenses for commercial variant are much + more expensive, so usually the commercial variant has only subset of + features compared to the EDU available. + + + +To load the latest version of Matlab load the module + + $ module load MATLAB + +By default the EDU variant is marked as default. If you need other +version or variant, load the particular version. To obtain the list of +available versions use + + $ module avail MATLAB + +If you need to use the Matlab GUI to prepare your Matlab programs, you +can use Matlab directly on the login nodes. But for all computations use +Matlab on the compute nodes via PBS Pro scheduler. + +If you require the Matlab GUI, please follow the general informations +about [running graphical +applications](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html). + +Matlab GUI is quite slow using the X forwarding built in the PBS (qsub +-X), so using X11 display redirection either via SSH or directly by +xauth (please see the "GUI Applications on Compute Nodes over VNC" part +[here](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html)) +is recommended. + +To run Matlab with GUI, use + + $ matlab + +To run Matlab in text mode, without the Matlab Desktop GUI environment, +use + + $ matlab -nodesktop -nosplash + +plots, images, etc... will be still available. + +Running parallel Matlab using Distributed Computing Toolbox / Engine +------------------------------------------------------------------------ + +Distributed toolbox is available only for the EDU variant + +The MPIEXEC mode available in previous versions is no longer available +in MATLAB 2015. Also, the programming interface has changed. Refer +to [Release +Notes](http://www.mathworks.com/help/distcomp/release-notes.html#buanp9e-1). + +Delete previously used file mpiLibConf.m, we have observed crashes when +using Intel MPI. + +To use Distributed Computing, you first need to setup a parallel +profile. We have provided the profile for you, you can either import it +in MATLAB command line: + + > parallel.importProfile('/apps/all/MATLAB/2015b-EDU/SalomonPBSPro.settings') + + ans = + + SalomonPBSPro + +Or in the GUI, go to tab HOME -> Parallel -> Manage Cluster +Profiles..., click Import and navigate to : + +/apps/all/MATLAB/2015b-EDU/SalomonPBSPro.settings + +With the new mode, MATLAB itself launches the workers via PBS, so you +can either use interactive mode or a batch mode on one node, but the +actual parallel processing will be done in a separate job started by +MATLAB itself. Alternatively, you can use "local" mode to run parallel +code on just a single node. + +### Parallel Matlab interactive session + +Following example shows how to start interactive session with support +for Matlab GUI. For more information about GUI based applications on +Anselm see [this +page](../../../get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system/x-window-and-vnc.html). + + $ xhost + + $ qsub -I -v DISPLAY=$(uname -n):$(echo $DISPLAY | cut -d ':' -f 2) -A NONE-0-0 -q qexp -l select=1 -l walltime=00:30:00 + -l feature__matlab__MATLAB=1 + +This qsub command example shows how to run Matlab on a single node. + +The second part of the command shows how to request all necessary +licenses. In this case 1 Matlab-EDU license and 48 Distributed Computing +Engines licenses. + +Once the access to compute nodes is granted by PBS, user can load +following modules and start Matlab: + + r1i0n17$ module load MATLAB/2015a-EDU + r1i0n17$ matlab & + +### Parallel Matlab batch job in Local mode + +To run matlab in batch mode, write an matlab script, then write a bash +jobscript and execute via the qsub command. By default, matlab will +execute one matlab worker instance per allocated core. + + #!/bin/bash + #PBS -A PROJECT ID + #PBS -q qprod + #PBS -l select=1:ncpus=24:mpiprocs=24:ompthreads=1 + + # change to shared scratch directory + SCR=/scratch/work/user/$USER/$PBS_JOBID + mkdir -p $SCR ; cd $SCR || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/matlabcode.m . + + # load modules + module load MATLAB/2015a-EDU + + # execute the calculation + matlab -nodisplay -r matlabcode > output.out + + # copy output file to home + cp output.out $PBS_O_WORKDIR/. + +This script may be submitted directly to the PBS workload manager via +the qsub command. The inputs and matlab script are in matlabcode.m +file, outputs in output.out file. Note the missing .m extension in the +matlab -r matlabcodefile call, **the .m must not be included**. Note +that the **shared /scratch must be used**. Further, it is **important to +include quit** statement at the end of the matlabcode.m script. + +Submit the jobscript using qsub + + $ qsub ./jobscript + +### Parallel Matlab Local mode program example + +The last part of the configuration is done directly in the user Matlab +script before Distributed Computing Toolbox is started. + + cluster = parcluster('local') + +This script creates scheduler object "cluster" of type "local" that +starts workers locally. + +Please note: Every Matlab script that needs to initialize/use matlabpool +has to contain these three lines prior to calling parpool(sched, ...) +function. + +The last step is to start matlabpool with "cluster" object and correct +number of workers. We have 24 cores per node, so we start 24 workers. + + parpool(cluster,24); + + + ... parallel code ... + + + parpool close + +The complete example showing how to use Distributed Computing Toolbox in +local mode is shown here. + + cluster = parcluster('local'); + cluster + + parpool(cluster,24); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') +    +    T = W*x; % Calculation performed on labs, in parallel. +             % T and W are both codistributed arrays here. + end + T; + whos        % T and W are both distributed arrays here. + + parpool close + quit + +You can copy and paste the example in a .m file and execute. Note that +the parpool size should correspond to **total number of cores** +available on allocated nodes. + +### Parallel Matlab Batch job using PBS mode (workers spawned in a separate job) + +This mode uses PBS scheduler to launch the parallel pool. It uses the +SalomonPBSPro profile that needs to be imported to Cluster Manager, as +mentioned before. This methodod uses MATLAB's PBS Scheduler interface - +it spawns the workers in a separate job submitted by MATLAB using qsub. + +This is an example of m-script using PBS mode: + + cluster = parcluster('SalomonPBSPro'); + set(cluster, 'SubmitArguments', '-A OPEN-0-0'); + set(cluster, 'ResourceTemplate', '-q qprod -l select=10:ncpus=24'); + set(cluster, 'NumWorkers', 240); + + pool = parpool(cluster,240); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') + + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. + end + whos % T and W are both distributed arrays here. + + % shut down parallel pool + delete(pool) + +Note that we first construct a cluster object using the imported +profile, then set some important options, namely : SubmitArguments, +where you need to specify accounting id, and ResourceTemplate, where you +need to specify number of nodes to run the job. + +You can start this script using batch mode the same way as in Local mode +example. + +### Parallel Matlab Batch with direct launch (workers spawned within the existing job) + +This method is a "hack" invented by us to emulate the mpiexec +functionality found in previous MATLAB versions. We leverage the MATLAB +Generic Scheduler interface, but instead of submitting the workers to +PBS, we launch the workers directly within the running job, thus we +avoid the issues with master script and workers running in separate jobs +(issues with license not available, waiting for the worker's job to +spawn etc.) + +Please note that this method is experimental. + +For this method, you need to use SalomonDirect profile, import it +using [the same way as +SalomonPBSPro](matlab.html#running-parallel-matlab-using-distributed-computing-toolbox---engine) + +This is an example of m-script using direct mode: + + parallel.importProfile('/apps/all/MATLAB/2015b-EDU/SalomonDirect.settings') + cluster = parcluster('SalomonDirect'); + set(cluster, 'NumWorkers', 48); + + pool = parpool(cluster, 48); + + n=2000; + + W = rand(n,n); + W = distributed(W); + x = (1:n)'; + x = distributed(x); + spmd + [~, name] = system('hostname') + + T = W*x; % Calculation performed on labs, in parallel. + % T and W are both codistributed arrays here. + end + whos % T and W are both distributed arrays here. + + % shut down parallel pool + delete(pool) + +### Non-interactive Session and Licenses + +If you want to run batch jobs with Matlab, be sure to request +appropriate license features with the PBS Pro scheduler, at least the " +-l __feature__matlab__MATLAB=1" for EDU variant of Matlab. More +information about how to check the license features states and how to +request them with PBS Pro, please [look +here](../../../anselm-cluster-documentation/software/isv_licenses.html). + +The licensing feature of PBS is currently disabled. + +In case of non-interactive session please read the [following +information](../../../anselm-cluster-documentation/software/isv_licenses.html) +on how to modify the qsub command to test for available licenses prior +getting the resource allocation. + +### Matlab Distributed Computing Engines start up time + +Starting Matlab workers is an expensive process that requires certain +amount of time. For your information please see the following table: + + |compute nodes|number of workers|start-up time[s]| + |---|---|---| + |16|384|831| + |8|192|807| + |4|96|483| + |2|48|16| + +MATLAB on UV2000 +----------------- + +UV2000 machine available in queue "qfat" can be used for MATLAB +computations. This is a SMP NUMA machine with large amount of RAM, which +can be beneficial for certain types of MATLAB jobs. CPU cores are +allocated in chunks of 8 for this machine. + +You can use MATLAB on UV2000 in two parallel modes : + +### Threaded mode + +Since this is a SMP machine, you can completely avoid using Parallel +Toolbox and use only MATLAB's threading. MATLAB will automatically +detect the number of cores you have allocated and will set +maxNumCompThreads accordingly and certain +operations, such as fft, , eig, svd, +etc. will be automatically run in threads. The advantage of this mode is +that you don't need to modify your existing sequential codes. + +### Local cluster mode + +You can also use Parallel Toolbox on UV2000. Use l[ocal cluster +mode](matlab.html#parallel-matlab-batch-job-in-local-mode), +"SalomonPBSPro" profile will not work. + + + + + diff --git a/converted/docs.it4i.cz/salomon/software/numerical-languages/octave.md b/converted/docs.it4i.cz/salomon/software/numerical-languages/octave.md new file mode 100644 index 0000000000000000000000000000000000000000..6bb5f2d8480cd01e620cf6d387803b0e7eca1c46 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/numerical-languages/octave.md @@ -0,0 +1,79 @@ +Octave +====== + + + +GNU Octave is a high-level interpreted language, primarily intended for +numerical computations. It provides capabilities for the numerical +solution of linear and nonlinear problems, and for performing other +numerical experiments. It also provides extensive graphics capabilities +for data visualization and manipulation. Octave is normally used through +its interactive command line interface, but it can also be used to write +non-interactive programs. The Octave language is quite similar to Matlab +so that most programs are easily portable. Read more on +<http://www.gnu.org/software/octave/>*** + +Two versions of octave are available on the cluster, via module + + Status Version module + ------ |---|---|---- -------- + **Stable** Octave 3.8.2 Octave + + + + $ module load Octave + +The octave on the cluster is linked to highly optimized MKL mathematical +library. This provides threaded parallelization to many octave kernels, +notably the linear algebra subroutines. Octave runs these heavy +calculation kernels without any penalty. By default, octave would +parallelize to 24 threads. You may control the threads by setting the +OMP_NUM_THREADS environment variable. + +To run octave interactively, log in with ssh -X parameter for X11 +forwarding. Run octave: + + $ octave + +To run octave in batch mode, write an octave script, then write a bash +jobscript and execute via the qsub command. By default, octave will use +16 threads when running MKL kernels. + + #!/bin/bash + + # change to local scratch directory + mkdir -p /scratch/work/user/$USER/$PBS_JOBID + cd /scratch/work/user/$USER/$PBS_JOBID || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/octcode.m . + + # load octave module + module load Octave + + # execute the calculation + octave -q --eval octcode > output.out + + # copy output file to home + cp output.out $PBS_O_WORKDIR/. + + #exit + exit + +This script may be submitted directly to the PBS workload manager via +the qsub command. The inputs are in octcode.m file, outputs in +output.out file. See the single node jobscript example in the [Job +execution +section](../../resource-allocation-and-job-execution.html). + +The octave c compiler mkoctfile calls the GNU gcc 4.8.1 for compiling +native c code. This is very useful for running native c subroutines in +octave environment. + +$ mkoctfile -v + +Octave may use MPI for interprocess communication +This functionality is currently not supported on the cluster cluster. In +case you require the octave interface to MPI, please contact our +[cluster support](https://support.it4i.cz/rt/). + diff --git a/converted/docs.it4i.cz/salomon/software/numerical-languages/r.md b/converted/docs.it4i.cz/salomon/software/numerical-languages/r.md new file mode 100644 index 0000000000000000000000000000000000000000..85e34b69199ad4b1bdb78d498e7c98aa89845b9b --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/numerical-languages/r.md @@ -0,0 +1,460 @@ +R += + + + +Introduction +------------ + +The R is a language and environment for statistical computing and +graphics. R provides a wide variety of statistical (linear and +nonlinear modelling, classical statistical tests, time-series analysis, +classification, clustering, ...) and graphical techniques, and is highly +extensible. + +One of R's strengths is the ease with which well-designed +publication-quality plots can be produced, including mathematical +symbols and formulae where needed. Great care has been taken over the +defaults for the minor design choices in graphics, but the user retains +full control. + +Another convenience is the ease with which the C code or third party +libraries may be integrated within R. + +Extensive support for parallel computing is available within R. + +Read more on <http://www.r-project.org/>, +<http://cran.r-project.org/doc/manuals/r-release/R-lang.html> + +Modules +------- + +**The R version 3.1.1 is available on the cluster, along with GUI +interface Rstudio** + + |Application|Version|module| + ------- |---|---|---- --------------------- + |**R**|R 3.1.1|R/3.1.1-intel-2015b| + |**Rstudio**|Rstudio 0.97|Rstudio| + + $ module load R + +Execution +--------- + +The R on Anselm is linked to highly optimized MKL mathematical +library. This provides threaded parallelization to many R kernels, +notably the linear algebra subroutines. The R runs these heavy +calculation kernels without any penalty. By default, the R would +parallelize to 24 threads. You may control the threads by setting the +OMP_NUM_THREADS environment variable. + +### Interactive execution + +To run R interactively, using Rstudio GUI, log in with ssh -X parameter +for X11 forwarding. Run rstudio: + + $ module load Rstudio + $ rstudio + +### Batch execution + +To run R in batch mode, write an R script, then write a bash jobscript +and execute via the qsub command. By default, R will use 24 threads when +running MKL kernels. + +Example jobscript: + + #!/bin/bash + + # change to local scratch directory + cd /lscratch/$PBS_JOBID || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/rscript.R . + + # load R module + module load R + + # execute the calculation + R CMD BATCH rscript.R routput.out + + # copy output file to home + cp routput.out $PBS_O_WORKDIR/. + + #exit + exit + +This script may be submitted directly to the PBS workload manager via +the qsub command. The inputs are in rscript.R file, outputs in +routput.out file. See the single node jobscript example in the [Job +execution +section](../../resource-allocation-and-job-execution/job-submission-and-execution.html). + +Parallel R +---------- + +Parallel execution of R may be achieved in many ways. One approach is +the implied parallelization due to linked libraries or specially enabled +functions, as [described +above](r.html#interactive-execution). In the following +sections, we focus on explicit parallelization, where parallel +constructs are directly stated within the R script. + +Package parallel +-------------------- + +The package parallel provides support for parallel computation, +including by forking (taken from package multicore), by sockets (taken +from package snow) and random-number generation. + +The package is activated this way: + + $ R + > library(parallel) + +More information and examples may be obtained directly by reading the +documentation available in R + + > ?parallel + > library(help = "parallel") + > vignette("parallel") + +Download the package +[parallell](package-parallel-vignette) vignette. + +The forking is the most simple to use. Forking family of functions +provide parallelized, drop in replacement for the serial apply() family +of functions. + +Forking via package parallel provides functionality similar to OpenMP +construct +#omp parallel for + +Only cores of single node can be utilized this way! + +Forking example: + + library(parallel) + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #initialize + size <- detectCores() + + while (TRUE) + { + #read number of intervals + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + + if(n<=0) break + + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(1,n); + pi3 <- h*sum(simplify2array(mclapply(i,f,h,mc.cores=size))); + + #print results + cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + +The above example is the classic parallel example for calculating the +number Ď€. Note the **detectCores()** and **mclapply()** functions. +Execute the example as: + + $ R --slave --no-save --no-restore -f pi3p.R + +Every evaluation of the integrad function runs in parallel on different +process. + +Package Rmpi +------------ + +package Rmpi provides an interface (wrapper) to MPI APIs. + +It also provides interactive R slave environment. On the cluster, Rmpi +provides interface to the +[OpenMPI](../mpi-1/Running_OpenMPI.html). + +Read more on Rmpi at <http://cran.r-project.org/web/packages/Rmpi/>, +reference manual is available at +<http://cran.r-project.org/web/packages/Rmpi/Rmpi.pdf> + +When using package Rmpi, both openmpi and R modules must be loaded + + $ module load OpenMPI + $ module load R + +Rmpi may be used in three basic ways. The static approach is identical +to executing any other MPI programm. In addition, there is Rslaves +dynamic MPI approach and the mpi.apply approach. In the following +section, we will use the number Ď€ integration example, to illustrate all +these concepts. + +### static Rmpi + +Static Rmpi programs are executed via mpiexec, as any other MPI +programs. Number of processes is static - given at the launch time. + +Static Rmpi example: + + library(Rmpi) + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #initialize + invisible(mpi.comm.dup(0,1)) + rank <- mpi.comm.rank() + size <- mpi.comm.size() + n<-0 + + while (TRUE) + { + #read number of intervals + if (rank==0) { + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + } + + #broadcat the intervals + n <- mpi.bcast(as.integer(n),type=1) + + if(n<=0) break + + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(rank+1,n,size); + mypi <- h*sum(sapply(i,f,h)); + + pi3 <- mpi.reduce(mypi) + + #print results + if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + + mpi.quit() + +The above is the static MPI example for calculating the number Ď€. Note +the **library(Rmpi)** and **mpi.comm.dup()** function calls. +Execute the example as: + + $ mpirun R --slave --no-save --no-restore -f pi3.R + +### dynamic Rmpi + +Dynamic Rmpi programs are executed by calling the R directly. OpenMPI +module must be still loaded. The R slave processes will be spawned by a +function call within the Rmpi program. + +Dynamic Rmpi example: + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #the worker function + workerpi <- function() + { + #initialize + rank <- mpi.comm.rank() + size <- mpi.comm.size() + n<-0 + + while (TRUE) + { + #read number of intervals + if (rank==0) { + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + } + + #broadcat the intervals + n <- mpi.bcast(as.integer(n),type=1) + + if(n<=0) break + + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(rank+1,n,size); + mypi <- h*sum(sapply(i,f,h)); + + pi3 <- mpi.reduce(mypi) + + #print results + if (rank==0) cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + } + + #main + library(Rmpi) + + cat("Enter the number of slaves: ") + fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) + + mpi.spawn.Rslaves(nslaves=ns) + mpi.bcast.Robj2slave(f) + mpi.bcast.Robj2slave(workerpi) + + mpi.bcast.cmd(workerpi()) + workerpi() + + mpi.quit() + +The above example is the dynamic MPI example for calculating the number +Ď€. Both master and slave processes carry out the calculation. Note the +mpi.spawn.Rslaves(), mpi.bcast.Robj2slave()** and the +mpi.bcast.cmd()** function calls. +Execute the example as: + + $ mpirun -np 1 R --slave --no-save --no-restore -f pi3Rslaves.R + +Note that this method uses MPI_Comm_spawn (Dynamic process feature of +MPI-2) to start the slave processes - the master process needs to be +launched with MPI. In general, Dynamic processes are not well supported +among MPI implementations, some issues might arise. Also, environment +variables are not propagated to spawned processes, so they will not see +paths from modules. +### mpi.apply Rmpi + +mpi.apply is a specific way of executing Dynamic Rmpi programs. + +mpi.apply() family of functions provide MPI parallelized, drop in +replacement for the serial apply() family of functions. + +Execution is identical to other dynamic Rmpi programs. + +mpi.apply Rmpi example: + + #integrand function + f <- function(i,h) { + x <- h*(i-0.5) + return (4/(1 + x*x)) + } + + #the worker function + workerpi <- function(rank,size,n) + { + #run the calculation + n <- max(n,size) + h <- 1.0/n + + i <- seq(rank,n,size); + mypi <- h*sum(sapply(i,f,h)); + + return(mypi) + } + + #main + library(Rmpi) + + cat("Enter the number of slaves: ") + fp<-file("stdin"); ns<-scan(fp,nmax=1); close(fp) + + mpi.spawn.Rslaves(nslaves=ns) + mpi.bcast.Robj2slave(f) + mpi.bcast.Robj2slave(workerpi) + + while (TRUE) + { + #read number of intervals + cat("Enter the number of intervals: (0 quits) ") + fp<-file("stdin"); n<-scan(fp,nmax=1); close(fp) + if(n<=0) break + + #run workerpi + i=seq(1,2*ns) + pi3=sum(mpi.parSapply(i,workerpi,2*ns,n)) + + #print results + cat(sprintf("Value of PI %16.14f, diff= %16.14fn",pi3,pi3-pi)) + } + + mpi.quit() + +The above is the mpi.apply MPI example for calculating the number Ď€. +Only the slave processes carry out the calculation. Note the +mpi.parSapply(), ** function call. The package +parallel +[example](r.html#package-parallel)[above](r.html#package-parallel){.anchor +may be trivially adapted (for much better performance) to this structure +using the mclapply() in place of mpi.parSapply(). + +Execute the example as: + + $ mpirun -np 1 R --slave --no-save --no-restore -f pi3parSapply.R + +Combining parallel and Rmpi +--------------------------- + +Currently, the two packages can not be combined for hybrid calculations. + +Parallel execution +------------------ + +The R parallel jobs are executed via the PBS queue system exactly as any +other parallel jobs. User must create an appropriate jobscript and +submit via the **qsub** + +Example jobscript for [static Rmpi](r.html#static-rmpi) +parallel R execution, running 1 process per core: + + #!/bin/bash + #PBS -q qprod + #PBS -N Rjob + #PBS -l select=100:ncpus=24:mpiprocs=24:ompthreads=1 + + # change to scratch directory + SCRDIR=/scratch/work/user/$USER/myjob + cd $SCRDIR || exit + + # copy input file to scratch + cp $PBS_O_WORKDIR/rscript.R . + + # load R and openmpi module + module load R + module load OpenMPI + + # execute the calculation + mpiexec -bycore -bind-to-core R --slave --no-save --no-restore -f rscript.R + + # copy output file to home + cp routput.out $PBS_O_WORKDIR/. + + #exit + exit + +For more information about jobscripts and MPI execution refer to the +[Job +submission](../../resource-allocation-and-job-execution/job-submission-and-execution.html) +and general [MPI](../mpi-1.html) sections. + +Xeon Phi Offload +---------------- + +By leveraging MKL, R can accelerate certain computations, most notably +linear algebra operations on the Xeon Phi accelerator by using Automated +Offload. To use MKL Automated Offload, you need to first set this +environment variable before R execution : + + $ export MKL_MIC_ENABLE=1 + +[Read more about automatic +offload](../intel-xeon-phi.html) + diff --git a/converted/docs.it4i.cz/salomon/software/operating-system.md b/converted/docs.it4i.cz/salomon/software/operating-system.md new file mode 100644 index 0000000000000000000000000000000000000000..9cadb41ffdceb80ea618038b186e10be74f1752b --- /dev/null +++ b/converted/docs.it4i.cz/salomon/software/operating-system.md @@ -0,0 +1,13 @@ +Operating System +================ + +The operating system, deployed on Salomon cluster + + + +The operating system on Salomon is Linux - CentOS 6.6. + +The CentOS Linux distribution is a stable, predictable, manageable +and reproducible platform derived from the sources of Red Hat Enterprise +Linux (RHEL). + diff --git a/converted/docs.it4i.cz/salomon/storage/cesnet-data-storage.md b/converted/docs.it4i.cz/salomon/storage/cesnet-data-storage.md new file mode 100644 index 0000000000000000000000000000000000000000..22b880420d3c4fcba0b5574ebe1865f4fdb4fa24 --- /dev/null +++ b/converted/docs.it4i.cz/salomon/storage/cesnet-data-storage.md @@ -0,0 +1,128 @@ +CESNET Data Storage +=================== + + + +Introduction +------------ + +Do not use shared filesystems at IT4Innovations as a backup for large +amount of data or long-term archiving purposes. + +The IT4Innovations does not provide storage capacity for data archiving. +Academic staff and students of research institutions in the Czech +Republic can use [CESNET Storage +service](https://du.cesnet.cz/). + +The CESNET Storage service can be used for research purposes, mainly by +academic staff and students of research institutions in the Czech +Republic. + +User of data storage CESNET (DU) association can become organizations or +an individual person who is either in the current employment +relationship (employees) or the current study relationship (students) to +a legal entity (organization) that meets the “Principles for access to +CESNET Large infrastructure (Access Policy)”. + +User may only use data storage CESNET for data transfer and storage +which are associated with activities in science, research, development, +the spread of education, culture and prosperity. In detail see +“Acceptable Use Policy CESNET Large Infrastructure (Acceptable Use +Policy, AUP)”. + +The service is documented at +<https://du.cesnet.cz/wiki/doku.php/en/start>. For special requirements +please contact directly CESNET Storage Department via e-mail +[du-support(at)cesnet.cz](mailto:du-support@cesnet.cz). + +The procedure to obtain the CESNET access is quick and trouble-free. + +(source +[https://du.cesnet.cz/](https://du.cesnet.cz/wiki/doku.php/en/start "CESNET Data Storage")) + +CESNET storage access +--------------------- + +### Understanding Cesnet storage + +It is very important to understand the Cesnet storage before uploading +data. Please read +<https://du.cesnet.cz/en/navody/home-migrace-plzen/start> first. + +Once registered for CESNET Storage, you may [access the +storage](https://du.cesnet.cz/en/navody/faq/start) in +number of ways. We recommend the SSHFS and RSYNC methods. + +### SSHFS Access + +SSHFS: The storage will be mounted like a local hard drive + +The SSHFS provides a very convenient way to access the CESNET Storage. +The storage will be mounted onto a local directory, exposing the vast +CESNET Storage as if it was a local removable harddrive. Files can be +than copied in and out in a usual fashion. + +First, create the mountpoint + + $ mkdir cesnet + +Mount the storage. Note that you can choose among the ssh.du1.cesnet.cz +(Plzen), ssh.du2.cesnet.cz (Jihlava), ssh.du3.cesnet.cz (Brno) +Mount tier1_home **(only 5120M !)**: + + $ sshfs username@ssh.du1.cesnet.cz:. cesnet/ + +For easy future access from Anselm, install your public key + + $ cp .ssh/id_rsa.pub cesnet/.ssh/authorized_keys + +Mount tier1_cache_tape for the Storage VO: + + $ sshfs username@ssh.du1.cesnet.cz:/cache_tape/VO_storage/home/username cesnet/ + +View the archive, copy the files and directories in and out + + $ ls cesnet/ + $ cp -a mydir cesnet/. + $ cp cesnet/myfile . + +Once done, please remember to unmount the storage + + $ fusermount -u cesnet + +### Rsync access + +Rsync provides delta transfer for best performance, can resume +interrupted transfers + +Rsync is a fast and extraordinarily versatile file copying tool. It is +famous for its delta-transfer algorithm, which reduces the amount of +data sent over the network by sending only the differences between the +source files and the existing files in the destination. Rsync is widely +used for backups and mirroring and as an improved copy command for +everyday use. + +Rsync finds files that need to be transferred using a "quick check" +algorithm (by default) that looks for files that have changed in size or +in last-modified time. Any changes in the other preserved attributes +(as requested by options) are made on the destination file directly when +the quick check indicates that the file's data does not need to be +updated. + +More about Rsync at +<https://du.cesnet.cz/en/navody/rsync/start#pro_bezne_uzivatele> + +Transfer large files to/from Cesnet storage, assuming membership in the +Storage VO + + $ rsync --progress datafile username@ssh.du1.cesnet.cz:VO_storage-cache_tape/. + $ rsync --progress username@ssh.du1.cesnet.cz:VO_storage-cache_tape/datafile . + +Transfer large directories to/from Cesnet storage, assuming membership +in the Storage VO + + $ rsync --progress -av datafolder username@ssh.du1.cesnet.cz:VO_storage-cache_tape/. + $ rsync --progress -av username@ssh.du1.cesnet.cz:VO_storage-cache_tape/datafolder . + +Transfer rates of about 28MB/s can be expected. + diff --git a/converted/docs.it4i.cz/salomon/storage/storage.md b/converted/docs.it4i.cz/salomon/storage/storage.md new file mode 100644 index 0000000000000000000000000000000000000000..b170dd2ff3635cd9064fccc64fb5331b2e83bfdc --- /dev/null +++ b/converted/docs.it4i.cz/salomon/storage/storage.md @@ -0,0 +1,513 @@ +Storage +======= + + + +Introduction +------------ + +There are two main shared file systems on Salomon cluster, the [HOME](storage.html#home)and [SCRATCH](storage.html#shared-filesystems). + +All login and compute nodes may access same data on shared filesystems. +Compute nodes are also equipped with local (non-shared) scratch, ramdisk +and tmp filesystems. + +Policy (in a nutshell) +---------------------- + +Use [ for your most valuable data +and programs. +Use [WORK](storage.html#work) for your large project +files +Use [TEMP](storage.html#temp) for large scratch data. + +Do not use for [archiving](storage.html#archiving)! + +Archiving +------------- + +Please don't use shared filesystems as a backup for large amount of data +or long-term archiving mean. The academic staff and students of research +institutions in the Czech Republic can use [CESNET storage +service](../../anselm-cluster-documentation/storage-1/cesnet-data-storage.html), +which is available via SSHFS. + +Shared Filesystems +---------------------- + +Salomon computer provides two main shared filesystems, the [ +HOME +filesystem](storage.html#home-filesystem) and the +[SCRATCH filesystem](storage.html#scratch-filesystem). The +SCRATCH filesystem is partitioned to [WORK and TEMP +workspaces](storage.html#shared-workspaces). The HOME +filesystem is realized as a tiered NFS disk storage. The SCRATCH +filesystem is realized as a parallel Lustre filesystem. Both shared file +systems are accessible via the Infiniband network. Extended ACLs are +provided on both HOME/SCRATCH filesystems for the purpose of sharing +data with other users using fine-grained control. + +###HOME filesystem + +The HOME filesystem is realized as a Tiered filesystem, exported via +NFS. The first tier has capacity 100TB, second tier has capacity 400TB. +The filesystem is available on all login and computational nodes. The +Home filesystem hosts the [HOME +workspace](storage.html#home). + +###SCRATCH filesystem + +The architecture of Lustre on Salomon is composed of two metadata +servers (MDS) and six data/object storage servers (OSS). Accessible +capacity is 1.69 PB, shared among all users. The SCRATCH filesystem +hosts the [WORK and TEMP +workspaces](storage.html#shared-workspaces). + + Configuration of the SCRATCH Lustre storage + + +- SCRATCH Lustre object storage + + + - Disk array SFA12KX + - 540 4TB SAS 7.2krpm disks + - 54 OSTs of 10 disks in RAID6 (8+2) + - 15 hot-spare disks + - 4x 400GB SSD cache + + + +- SCRATCH Lustre metadata storage + + + - Disk array EF3015 + - 12 600GB SAS 15krpm disks + + + +### Understanding the Lustre Filesystems + +(source <http://www.nas.nasa.gov>) + +A user file on the Lustre filesystem can be divided into multiple chunks +(stripes) and stored across a subset of the object storage targets +(OSTs) (disks). The stripes are distributed among the OSTs in a +round-robin fashion to ensure load balancing. + +When a client (a compute +node from your job) needs to create +or access a file, the client queries the metadata server ( +MDS) and the metadata target ( +MDT) for the layout and location of the +[file's +stripes](http://www.nas.nasa.gov/hecc/support/kb/Lustre_Basics_224.html#striping). +Once the file is opened and the client obtains the striping information, +the MDS is no longer involved in the +file I/O process. The client interacts directly with the object storage +servers (OSSes) and OSTs to perform I/O operations such as locking, disk +allocation, storage, and retrieval. + +If multiple clients try to read and write the same part of a file at the +same time, the Lustre distributed lock manager enforces coherency so +that all clients see consistent results. + +There is default stripe configuration for Salomon Lustre filesystems. +However, users can set the following stripe parameters for their own +directories or files to get optimum I/O performance: + +1. stripe_size: the size of the chunk in bytes; specify with k, m, or + g to use units of KB, MB, or GB, respectively; the size must be an + even multiple of 65,536 bytes; default is 1MB for all Salomon Lustre + filesystems +2. stripe_count the number of OSTs to stripe across; default is 1 for + Salomon Lustre filesystems one can specify -1 to use all OSTs in + the filesystem. +3. stripe_offset The index of the + OST where the first stripe is to be + placed; default is -1 which results in random selection; using a + non-default value is NOT recommended. + + + +Setting stripe size and stripe count correctly for your needs may +significantly impact the I/O performance you experience. + +Use the lfs getstripe for getting the stripe parameters. Use the lfs +setstripe command for setting the stripe parameters to get optimal I/O +performance The correct stripe setting depends on your needs and file +access patterns. + +` +$ lfs getstripe dir|filename +$ lfs setstripe -s stripe_size -c stripe_count -o stripe_offset dir|filename +` + +Example: + +` +$ lfs getstripe /scratch/work/user/username +/scratch/work/user/username +stripe_count: 1 stripe_size: 1048576 stripe_offset: -1 + +$ lfs setstripe -c -1 /scratch/work/user/username/ +$ lfs getstripe /scratch/work/user/username/ +/scratch/work/user/username/ +stripe_count: -1 stripe_size: 1048576 stripe_offset: -1 +` + +In this example, we view current stripe setting of the +/scratch/username/ directory. The stripe count is changed to all OSTs, +and verified. All files written to this directory will be striped over +all (54) OSTs + +Use lfs check OSTs to see the number and status of active OSTs for each +filesystem on Salomon. Learn more by reading the man page + +` +$ lfs check osts +$ man lfs +` + +### Hints on Lustre Stripping + +Increase the stripe_count for parallel I/O to the same file. + +When multiple processes are writing blocks of data to the same file in +parallel, the I/O performance for large files will improve when the +stripe_count is set to a larger value. The stripe count sets the number +of OSTs the file will be written to. By default, the stripe count is set +to 1. While this default setting provides for efficient access of +metadata (for example to support the ls -l command), large files should +use stripe counts of greater than 1. This will increase the aggregate +I/O bandwidth by using multiple OSTs in parallel instead of just one. A +rule of thumb is to use a stripe count approximately equal to the number +of gigabytes in the file. + +Another good practice is to make the stripe count be an integral factor +of the number of processes performing the write in parallel, so that you +achieve load balance among the OSTs. For example, set the stripe count +to 16 instead of 15 when you have 64 processes performing the writes. + +Using a large stripe size can improve performance when accessing very +large files + +Large stripe size allows each client to have exclusive access to its own +part of a file. However, it can be counterproductive in some cases if it +does not match your I/O pattern. The choice of stripe size has no effect +on a single-stripe file. + +Read more on +<http://wiki.lustre.org/manual/LustreManual20_HTML/ManagingStripingFreeSpace.html> + +Disk usage and quota commands +------------------------------------------ + +User quotas on the Lustre file systems (SCRATCH) can be checked +and reviewed using following command: + +` +$ lfs quota dir +` + +Example for Lustre SCRATCH directory: + +` +$ lfs quota /scratch +Disk quotas for user user001 (uid 1234): + Filesystem kbytes quota limit grace files quota limit grace +  /scratch    8    0 100000000000    -    3    0    0    - +Disk quotas for group user001 (gid 1234): + Filesystem kbytes quota limit grace files quota limit grace + /scratch    8    0    0    -    3    0    0    - +` + +In this example, we view current quota size limit of 100TB and 8KB +currently used by user001. + +HOME directory is mounted via NFS, so a different command must be used +to obtain quota information: + +  $ quota + +Example output: + + $ quota + Disk quotas for user vop999 (uid 1025): + Filesystem blocks quota limit grace files quota limit grace + home-nfs-ib.salomon.it4i.cz:/home + 28 0 250000000 10 0 500000 + +To have a better understanding of where the space is exactly used, you +can use following command to find out. + +` +$ du -hs dir +` + +Example for your HOME directory: + +` +$ cd /home +$ du -hs * .[a-zA-z0-9]* | grep -E "[0-9]*G|[0-9]*M" | sort -hr +258M cuda-samples +15M .cache +13M .mozilla +5,5M .eclipse +2,7M .idb_13.0_linux_intel64_app +` + +This will list all directories which are having MegaBytes or GigaBytes +of consumed space in your actual (in this example HOME) directory. List +is sorted in descending order from largest to smallest +files/directories. + +To have a better understanding of previous commands, you can read +manpages. + +` +$ man lfs +` + +` +$ man du +` + +Extended Access Control List (ACL) +---------------------------------- + +Extended ACLs provide another security mechanism beside the standard +POSIX ACLs which are defined by three entries (for +owner/group/others). Extended ACLs have more than the three basic +entries. In addition, they also contain a mask entry and may contain any +number of named user and named group entries. + +ACLs on a Lustre file system work exactly like ACLs on any Linux file +system. They are manipulated with the standard tools in the standard +manner. Below, we create a directory and allow a specific user access. + +` +[vop999@login1.salomon ~]$ umask 027 +[vop999@login1.salomon ~]$ mkdir test +[vop999@login1.salomon ~]$ ls -ld test +drwxr-x--- 2 vop999 vop999 4096 Nov 5 14:17 test +[vop999@login1.salomon ~]$ getfacl test +# file: test +# owner: vop999 +# group: vop999 +user::rwx +group::r-x +other::--- + +[vop999@login1.salomon ~]$ setfacl -m user:johnsm:rwx test +[vop999@login1.salomon ~]$ ls -ld test +drwxrwx---+ 2 vop999 vop999 4096 Nov 5 14:17 test +[vop999@login1.salomon ~]$ getfacl test +# file: test +# owner: vop999 +# group: vop999 +user::rwx +user:johnsm:rwx +group::r-x +mask::rwx +other::--- +` + +Default ACL mechanism can be used to replace setuid/setgid permissions +on directories. Setting a default ACL on a directory (-d flag to +setfacl) will cause the ACL permissions to be inherited by any newly +created file or subdirectory within the directory. Refer to this page +for more information on Linux ACL: + +[http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html ](http://www.vanemery.com/Linux/ACL/POSIX_ACL_on_Linux.html) + +Shared Workspaces +--------------------- + +###HOME + +Users home directories /home/username reside on HOME filesystem. +Accessible capacity is 0.5PB, shared among all users. Individual users +are restricted by filesystem usage quotas, set to 250GB per user. +If 250GB should prove as insufficient for particular user, please +contact [support](https://support.it4i.cz/rt), +the quota may be lifted upon request. + +The HOME filesystem is intended for preparation, evaluation, processing +and storage of data generated by active Projects. + +The HOME should not be used to archive data of past Projects or other +unrelated data. + +The files on HOME will not be deleted until end of the [users +lifecycle](../../get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.html). + +The workspace is backed up, such that it can be restored in case of +catasthropic failure resulting in significant data loss. This backup +however is not intended to restore old versions of user data or to +restore (accidentaly) deleted files. + +HOME workspace +Accesspoint +/home/username +Capacity +0.5PB +Throughput +6GB/s +User quota +250GB +Protocol +NFS, 2-Tier +### WORK + +The WORK workspace resides on SCRATCH filesystem. Users may create +subdirectories and files in directories **/scratch/work/user/username** +and **/scratch/work/project/projectid. **The /scratch/work/user/username +is private to user, much like the home directory. The +/scratch/work/project/projectid is accessible to all users involved in +project projectid. > + +The WORK workspace is intended to store users project data as well as +for high performance access to input and output files. All project data +should be removed once the project is finished. The data on the WORK +workspace are not backed up. + +Files on the WORK filesystem are **persistent** (not automatically +deleted) throughout duration of the project. + +The WORK workspace is hosted on SCRATCH filesystem. The SCRATCH is +realized as Lustre parallel filesystem and is available from all login +and computational nodes. Default stripe size is 1MB, stripe count is 1. +There are 54 OSTs dedicated for the SCRATCH filesystem. + +Setting stripe size and stripe count correctly for your needs may +significantly impact the I/O performance you experience. + +WORK workspace +Accesspoints +/scratch/work/user/username +/scratch/work/user/projectid +Capacity +1.6P +Throughput +30GB/s +User quota +100TB +Default stripe size +1MB +Default stripe count +1 +Number of OSTs +54 +Protocol +Lustre +### TEMP + +The TEMP workspace resides on SCRATCH filesystem. The TEMP workspace +accesspoint is /scratch/temp. Users may freely create subdirectories +and files on the workspace. Accessible capacity is 1.6P, shared among +all users on TEMP and WORK. Individual users are restricted by +filesystem usage quotas, set to 100TB per user. The purpose of this +quota is to prevent runaway programs from filling the entire filesystem +and deny service to other users. >If 100TB should prove as +insufficient for particular user, please contact +[support](https://support.it4i.cz/rt), the quota may be +lifted upon request. + +The TEMP workspace is intended for temporary scratch data generated +during the calculation as well as for high performance access to input +and output files. All I/O intensive jobs must use the TEMP workspace as +their working directory. + +Users are advised to save the necessary data from the TEMP workspace to +HOME or WORK after the calculations and clean up the scratch files. + +Files on the TEMP filesystem that are **not accessed for more than 90 +days** will be automatically **deleted**. + +The TEMP workspace is hosted on SCRATCH filesystem. The SCRATCH is +realized as Lustre parallel filesystem and is available from all login +and computational nodes. Default stripe size is 1MB, stripe count is 1. +There are 54 OSTs dedicated for the SCRATCH filesystem. + +Setting stripe size and stripe count correctly for your needs may +significantly impact the I/O performance you experience. + +TEMP workspace +Accesspoint +/scratch/temp +Capacity +1.6P +Throughput +30GB/s +User quota +100TB +Default stripe size +1MB +Default stripe count +1 +Number of OSTs +54 +Protocol +Lustre + + +RAM disk +-------- + +Every computational node is equipped with filesystem realized in memory, +so called RAM disk. + +Use RAM disk in case you need really fast access to your data of limited +size during your calculation. +Be very careful, use of RAM disk filesystem is at the expense of +operational memory. + +The local RAM disk is mounted as /ramdisk and is accessible to user +at /ramdisk/$PBS_JOBID directory. + +The local RAM disk filesystem is intended for temporary scratch data +generated during the calculation as well as for high performance access +to input and output files. Size of RAM disk filesystem is limited. Be +very careful, use of RAM disk filesystem is at the expense of +operational memory. It is not recommended to allocate large amount of +memory and use large amount of data in RAM disk filesystem at the same +time. + +The local RAM disk directory /ramdisk/$PBS_JOBID will be deleted +immediately after the calculation end. Users should take care to save +the output data from within the jobscript. + +RAM disk +Mountpoint + /ramdisk +Accesspoint + /ramdisk/$PBS_JOBID +Capacity +120 GB +Throughput +over 1.5 GB/s write, over 5 GB/s read, single thread +over 10 GB/s write, over 50 GB/s read, 16 threads + +User quota +none + + +Summary + +---------- + + -------------------------------------------------- + |Mountpoint|Usage|Protocol|Net|Capacity|Throughput|Limitations|Access| + ---------------------------------------- |---|---|---------------------- ------------- -------------- ------------ ------------- ---------- |**Version**|**Module**|------ + | /home|home directory|NFS, 2-Tier|0.5 PB|6 GB/s|Quota 250GB|Compute and login nodes|backed up| + + |/scratch/work|large project files|Lustre|1.69 PB|30 GB/s|Quota|Compute and login nodes|none| + + + |/scratch/temp|job temporary data|Lustre|1.69 PB|30 GB/s|Quota 100TB|Compute and login nodes|files older 90 days removed| + + |/ramdisk|job temporary data, node local|local|120GB|90 GB/s|none|Compute nodes|purged after job ends| + -------------------------------------------------- + + + diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.epub new file mode 100644 index 0000000000000000000000000000000000000000..8b63180b7dda3790ea02cf19f197eb70b2658965 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.epub new file mode 100644 index 0000000000000000000000000000000000000000..832980c06f321aee85422037f5d30fb228621103 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.epub new file mode 100644 index 0000000000000000000000000000000000000000..af183980f405d73dceaea579ba24e5b8f1e700dc Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.epub new file mode 100644 index 0000000000000000000000000000000000000000..d8478076df94cc99ed0bf7ab4d2ead3dfe5c1d56 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.epub new file mode 100644 index 0000000000000000000000000000000000000000..1f28849ef0be912fcaf052c65c8886cb17183528 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.epub new file mode 100644 index 0000000000000000000000000000000000000000..3fc17ddaf56871290a08e2b5b603e920d1a0e77a Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/introduction.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..0bdf6ae50eacd4d9a8399132b0f88b487a8ac289 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/introduction.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/network.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/network.epub new file mode 100644 index 0000000000000000000000000000000000000000..c916fd350535cfb801bccf118ad2d33ca6522d6e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/network.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/prace.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/prace.epub new file mode 100644 index 0000000000000000000000000000000000000000..0d946d94910ffccf58ca1f5f3eb59509e70a33fb Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/prace.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.epub new file mode 100644 index 0000000000000000000000000000000000000000..a2a9812ce58cd825638b7695f3d32947f4aa4b5e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.epub new file mode 100644 index 0000000000000000000000000000000000000000..5ee41e831818fc9fbfd9457458d720dad40732d4 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..6da129366ac91bf30be0d48842dfb536d15917f1 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.epub new file mode 100644 index 0000000000000000000000000000000000000000..ec73d837c7ca678dc570d32d77ae2d42a2b3ebb9 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.epub new file mode 100644 index 0000000000000000000000000000000000000000..1cb95dd5cf4ddd5572250337a238957ba6652279 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.epub new file mode 100644 index 0000000000000000000000000000000000000000..823ec7fe429758c7e0f9010eccec9802c858282e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys.epub new file mode 100644 index 0000000000000000000000000000000000000000..d28ba53488e06cc4c50b4866e9dfba8a4dbeb4e6 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.epub new file mode 100644 index 0000000000000000000000000000000000000000..de1684992c176474ef6b09d238a21362afd1e6b0 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.epub new file mode 100644 index 0000000000000000000000000000000000000000..c0aeb47d3e4216a6043612a46c0e19d704e32fa9 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.epub new file mode 100644 index 0000000000000000000000000000000000000000..d3df7a2cec3b8af9cf667b2282889c0e948af376 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.epub new file mode 100644 index 0000000000000000000000000000000000000000..5e246786f6fabb4d3543953da89bb51ba57eb403 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.epub new file mode 100644 index 0000000000000000000000000000000000000000..07d20c6f8c73df5c42ec6c1f89426ff48a029cd2 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.epub new file mode 100644 index 0000000000000000000000000000000000000000..7205de48cef06e5ea77ffc28b23afe991f6fe321 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.epub new file mode 100644 index 0000000000000000000000000000000000000000..0465325422bf70d1a749da317dd4e9d8b5828bf7 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/compilers.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/compilers.epub new file mode 100644 index 0000000000000000000000000000000000000000..e169c7b09819b4edeb68bc3c79614d12dd161581 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/compilers.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.epub new file mode 100644 index 0000000000000000000000000000000000000000..749581444acff209f694ae07474ea8265440ee21 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.epub new file mode 100644 index 0000000000000000000000000000000000000000..6dca6fc672ea96f680d5aa62beb288815485fb4f Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.epub new file mode 100644 index 0000000000000000000000000000000000000000..ee34c1f4e95c30239a7ef37b6cdbd5ed25c874a1 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.epub new file mode 100644 index 0000000000000000000000000000000000000000..6f0e103abccb1d84124836a48fac430ef72a733c Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.epub new file mode 100644 index 0000000000000000000000000000000000000000..b2b51fd768a1701ffe3ea0a2d6e6c02d99c57481 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.epub new file mode 100644 index 0000000000000000000000000000000000000000..ced33fcc529704b91101face762c3fff5e01fd69 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.epub new file mode 100644 index 0000000000000000000000000000000000000000..05fdefd654540e25476c10f06a751e184804c9c3 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.epub new file mode 100644 index 0000000000000000000000000000000000000000..e7006a77874635db63f68719aea147e09ca8390d Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.epub new file mode 100644 index 0000000000000000000000000000000000000000..b2a5ce6d23577dfafc8697fd695c25560d65a53e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.epub new file mode 100644 index 0000000000000000000000000000000000000000..8ed972f7da21c3cf194de63d1cacea45b27033fb Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.epub new file mode 100644 index 0000000000000000000000000000000000000000..ac5a515e710c2d3014e19c26211f16d2248f2114 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.epub new file mode 100644 index 0000000000000000000000000000000000000000..5b14aee2a15ae1470ca4903513d444d1238ffed0 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.epub new file mode 100644 index 0000000000000000000000000000000000000000..236b8251bb500931ff874dbc5318060cb17782b4 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.epub new file mode 100644 index 0000000000000000000000000000000000000000..b83e7c046b08c1f6dbf03adfb73e14fd207a417e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.epub new file mode 100644 index 0000000000000000000000000000000000000000..0888d4c91f7bac8a046fb1323ae241461933b835 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.epub new file mode 100644 index 0000000000000000000000000000000000000000..85c1c816a70f8099804dfc8f0f76ec8b836e8b8d Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.epub new file mode 100644 index 0000000000000000000000000000000000000000..b81feb405d84a89a0cb9bf374abb18beceb5804e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.epub new file mode 100644 index 0000000000000000000000000000000000000000..a2c81bb57be0c54b6b101acc6a4e0b40da402c37 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.epub new file mode 100644 index 0000000000000000000000000000000000000000..20e05761aa9f399840b4ba1d1b7d9c5d8bbf7de0 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.epub new file mode 100644 index 0000000000000000000000000000000000000000..60af04f181d512a8e74fd032d2039b2983c3443d Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..3a29efbb490816bd0744884b8338c841b22c4d8e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.epub new file mode 100644 index 0000000000000000000000000000000000000000..8ce273df2d2473accf9e1259dc5100fd709ae3fb Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.epub new file mode 100644 index 0000000000000000000000000000000000000000..669c2a22934ea0699655c06b7af96100cefc9bfd Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.epub new file mode 100644 index 0000000000000000000000000000000000000000..81df7694ef14a03f16fcb75494b20979b4b9a237 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/java.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/java.epub new file mode 100644 index 0000000000000000000000000000000000000000..f56f46c8e368acc40808d6d3b0eabf6ffe9da6c6 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/java.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.epub new file mode 100644 index 0000000000000000000000000000000000000000..9a19b8733e15c9f59c9033fca8a97f6b6c9d49c5 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.epub new file mode 100644 index 0000000000000000000000000000000000000000..dcac7cddad70c4097d6f28e0cde7d39ed89f6add Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.epub new file mode 100644 index 0000000000000000000000000000000000000000..020bbe98cccfac1456495225086e30d8a3723ae8 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.epub new file mode 100644 index 0000000000000000000000000000000000000000..7ae032d93f6dbcd87dac86da6ca9ef3a6a4d8b4f Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.epub new file mode 100644 index 0000000000000000000000000000000000000000..e4fea3e2d1e57629f935d1b55b7a13b849294c28 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.epub new file mode 100644 index 0000000000000000000000000000000000000000..8c4a8a6608cd238de933d0544fd31765ece6e407 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..a00c703552195492fb9c297f95a657195be5ccca Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.epub new file mode 100644 index 0000000000000000000000000000000000000000..61a06c9afc8b194c61dc8ba12af71a41fb6c432e Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.epub new file mode 100644 index 0000000000000000000000000000000000000000..9bf44eb957276bfabda5b3ce2dcf388f238d5a6b Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.epub new file mode 100644 index 0000000000000000000000000000000000000000..90d401ba2a2ab5d914d0422d9cc89324b52d764a Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.epub new file mode 100644 index 0000000000000000000000000000000000000000..1f5157c5e2f58ac2f58c090f839f0d8f779c1b44 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.epub new file mode 100644 index 0000000000000000000000000000000000000000..840966c72aa47bf7cd2ddef8f8bf2f93dd32030b Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.epub new file mode 100644 index 0000000000000000000000000000000000000000..9e1823b326e6a2accf2a1023286f32d4bbf52633 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.epub new file mode 100644 index 0000000000000000000000000000000000000000..625a5f83c354f6471e415b952e54039bbddb5aaf Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.epub new file mode 100644 index 0000000000000000000000000000000000000000..f2c95a70095357c765141ee8549a61bff4a97eba Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.epub new file mode 100644 index 0000000000000000000000000000000000000000..69bed06f13ff7556e7a5ede3db6c2527ae2e59bf Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.epub new file mode 100644 index 0000000000000000000000000000000000000000..27646dbcc6dd62ed327a56a6aa439b4c32e3d0d5 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.epub new file mode 100644 index 0000000000000000000000000000000000000000..e2eb7eeffb24c866a1ff47b757191bf15e125c5a Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.epub new file mode 100644 index 0000000000000000000000000000000000000000..eb408cfe719b3563bc0156c4081832c590ef8b01 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.epub new file mode 100644 index 0000000000000000000000000000000000000000..036d39ce6db639467583de1ac1550630308ceaab Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.epub new file mode 100644 index 0000000000000000000000000000000000000000..9cd5e7b52ff1ff576d2d8dce6e1b7a3f8a6d6c9b Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.epub new file mode 100644 index 0000000000000000000000000000000000000000..98355898508675850c0f4c04478e8417b93ca559 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.epub new file mode 100644 index 0000000000000000000000000000000000000000..665681b54dda131c0c52587d7aee85ed41f4209d Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/software/paraview.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/software/paraview.epub new file mode 100644 index 0000000000000000000000000000000000000000..9c09bbf1597db408a8127941229e778c9c68c0b1 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/software/paraview.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.epub new file mode 100644 index 0000000000000000000000000000000000000000..97b8170c629aeb2a253c328398a282403b30f467 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.epub differ diff --git a/epub/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.epub b/epub/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.epub new file mode 100644 index 0000000000000000000000000000000000000000..6e3b9d7e884174812453fd1f1d182ed55262ce33 Binary files /dev/null and b/epub/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.epub new file mode 100644 index 0000000000000000000000000000000000000000..61c4e524370c658d9d7f3d9afa888d53fdea752a Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.epub new file mode 100644 index 0000000000000000000000000000000000000000..ac3b1e50ac784019cd713ec7228765c4329a6f97 Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.epub new file mode 100644 index 0000000000000000000000000000000000000000..4f6cfb4c70eabcc3b5a95fb71b1c66bbdb37f34f Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.epub new file mode 100644 index 0000000000000000000000000000000000000000..a1d88895424c3a6daafe4a3611247b05cb0a4d7d Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..e020b0a5af7431bcebbbc81b3d0ac4849ae00307 Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..f12c427088104a7ee215dbdb675bb2c0c8879920 Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.epub new file mode 100644 index 0000000000000000000000000000000000000000..cf7b37d4edfae57497ba1df23c5042191474ae11 Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.epub new file mode 100644 index 0000000000000000000000000000000000000000..ade0005dd705b2a80b491be51206d00ef9d20fdb Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.epub new file mode 100644 index 0000000000000000000000000000000000000000..be4445f81b276c3f68e34e0df4cf0ffc7db23ec8 Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.epub new file mode 100644 index 0000000000000000000000000000000000000000..9f876df1503e309436f351b029af5e6ecf96730a Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.epub new file mode 100644 index 0000000000000000000000000000000000000000..74e010e347a1d8f71e4bebb3f303a88970c51259 Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.epub new file mode 100644 index 0000000000000000000000000000000000000000..c4316d601907252888682c06bbb33d9bc348d95b Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.epub new file mode 100644 index 0000000000000000000000000000000000000000..a804fa5148126606c7c65b5dffff41782cbc76f8 Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.epub differ diff --git a/epub/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.epub b/epub/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.epub new file mode 100644 index 0000000000000000000000000000000000000000..ded45ae32fc0798892de46b0613de3c7186418ae Binary files /dev/null and b/epub/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.epub differ diff --git a/epub/docs.it4i.cz/index.epub b/epub/docs.it4i.cz/index.epub new file mode 100644 index 0000000000000000000000000000000000000000..46f7b068b70834f791ad14701b4a8a518ba04468 Binary files /dev/null and b/epub/docs.it4i.cz/index.epub differ diff --git a/epub/docs.it4i.cz/salomon/accessing-the-cluster.epub b/epub/docs.it4i.cz/salomon/accessing-the-cluster.epub new file mode 100644 index 0000000000000000000000000000000000000000..4e4f07e65f3ba9433d97af3db901d252eba548ea Binary files /dev/null and b/epub/docs.it4i.cz/salomon/accessing-the-cluster.epub differ diff --git a/epub/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.epub b/epub/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.epub new file mode 100644 index 0000000000000000000000000000000000000000..1d7ba641b042e51f3ff88d67eede7c8a6139d51c Binary files /dev/null and b/epub/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.epub differ diff --git a/epub/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.epub b/epub/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.epub new file mode 100644 index 0000000000000000000000000000000000000000..3e653512f3e60fbceda8d210d5993b33751fe3b7 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.epub differ diff --git a/epub/docs.it4i.cz/salomon/environment-and-modules.epub b/epub/docs.it4i.cz/salomon/environment-and-modules.epub new file mode 100644 index 0000000000000000000000000000000000000000..36e457c3c582e611f4778c9c2af05597ee8c00e3 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/environment-and-modules.epub differ diff --git a/epub/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.epub b/epub/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.epub new file mode 100644 index 0000000000000000000000000000000000000000..26e55aab00ea0019a544f99f312ca1c84c4b81c4 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.epub differ diff --git a/epub/docs.it4i.cz/salomon/introduction.epub b/epub/docs.it4i.cz/salomon/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..48fe86db987def653a7e073200c13aa9738fe315 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/introduction.epub differ diff --git a/epub/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.epub b/epub/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.epub new file mode 100644 index 0000000000000000000000000000000000000000..773822b213caa37e5d8e3af6a7533cfc94561c30 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.epub differ diff --git a/epub/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.epub b/epub/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.epub new file mode 100644 index 0000000000000000000000000000000000000000..a6e09f692430993123f280f7c87c2ceec471a9c4 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.epub differ diff --git a/epub/docs.it4i.cz/salomon/network-1/network.epub b/epub/docs.it4i.cz/salomon/network-1/network.epub new file mode 100644 index 0000000000000000000000000000000000000000..3b30ee6c9a300eab6c41f3954b0648272eb4185b Binary files /dev/null and b/epub/docs.it4i.cz/salomon/network-1/network.epub differ diff --git a/epub/docs.it4i.cz/salomon/prace.epub b/epub/docs.it4i.cz/salomon/prace.epub new file mode 100644 index 0000000000000000000000000000000000000000..70dc54e0b9aba537e5a54be96d8cce4afe126687 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/prace.epub differ diff --git a/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.epub b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.epub new file mode 100644 index 0000000000000000000000000000000000000000..80fd391870b7f54218a194b8b64d7ef40566e2e5 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.epub differ diff --git a/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.epub b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..0ef5979bc7a33b66487d8d51a5de2c4e77bf04ba Binary files /dev/null and b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.epub differ diff --git a/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.epub b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.epub new file mode 100644 index 0000000000000000000000000000000000000000..722ba7115c7df7e7675cbb1b83ada7075a396550 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.epub differ diff --git a/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.epub b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.epub new file mode 100644 index 0000000000000000000000000000000000000000..17152b392bac0df162f8028706301d5bed2068dc Binary files /dev/null and b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.epub differ diff --git a/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.epub b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.epub new file mode 100644 index 0000000000000000000000000000000000000000..75cb8a388c5b89b91ddf3ec4aa7788d07f63c8a7 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/ansys-cfx.epub b/epub/docs.it4i.cz/salomon/software/ansys/ansys-cfx.epub new file mode 100644 index 0000000000000000000000000000000000000000..614908cd15600bf05c5f59d023a09fbff52b29a6 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/ansys-cfx.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/ansys-fluent.epub b/epub/docs.it4i.cz/salomon/software/ansys/ansys-fluent.epub new file mode 100644 index 0000000000000000000000000000000000000000..e40d6c5a9b11e86ff97d7125d0a8d076e34a22f7 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/ansys-fluent.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.epub b/epub/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.epub new file mode 100644 index 0000000000000000000000000000000000000000..fb5da00eb34fdd5202461c5db5ec1cb896b278e7 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.epub b/epub/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.epub new file mode 100644 index 0000000000000000000000000000000000000000..e7241675247a8f8d355d0f074a308fea299d438e Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.epub b/epub/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.epub new file mode 100644 index 0000000000000000000000000000000000000000..05bada04601693f960441351c6911b066c03ac5a Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/ansys.epub b/epub/docs.it4i.cz/salomon/software/ansys/ansys.epub new file mode 100644 index 0000000000000000000000000000000000000000..4fde93fb62fc139a2e87c280e9d9a542c6341f2e Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/ansys.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/licensing.epub b/epub/docs.it4i.cz/salomon/software/ansys/licensing.epub new file mode 100644 index 0000000000000000000000000000000000000000..556f20ebca54170011d513d1eb9559cae5eca46d Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/licensing.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.epub b/epub/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.epub new file mode 100644 index 0000000000000000000000000000000000000000..8680d4dbbaa999fa84e62ccbb915848b5902689f Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/ansys/workbench.epub b/epub/docs.it4i.cz/salomon/software/ansys/workbench.epub new file mode 100644 index 0000000000000000000000000000000000000000..eebf43a43cf0fa25621e501eb01d1c6cb8198b11 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/ansys/workbench.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/chemistry/molpro.epub b/epub/docs.it4i.cz/salomon/software/chemistry/molpro.epub new file mode 100644 index 0000000000000000000000000000000000000000..5782604f072fae6467ee77e0c5a8ae1853b85917 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/chemistry/molpro.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/chemistry/nwchem.epub b/epub/docs.it4i.cz/salomon/software/chemistry/nwchem.epub new file mode 100644 index 0000000000000000000000000000000000000000..8877512185b37c5296c1835c23de3e12a23f67d7 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/chemistry/nwchem.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/chemistry/phono3py.epub b/epub/docs.it4i.cz/salomon/software/chemistry/phono3py.epub new file mode 100644 index 0000000000000000000000000000000000000000..0a71376e8c722587d1bfcb10ce73e00bad99f659 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/chemistry/phono3py.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/compilers.epub b/epub/docs.it4i.cz/salomon/software/compilers.epub new file mode 100644 index 0000000000000000000000000000000000000000..66230eb783106127f3d615d15392d427fc8dffcb Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/compilers.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.epub b/epub/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.epub new file mode 100644 index 0000000000000000000000000000000000000000..743d2d1883fbdaaf3d804a2d51a9d38b6d6537ca Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.epub b/epub/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.epub new file mode 100644 index 0000000000000000000000000000000000000000..2e06ca47a4919c2e3bbb9ca36c2ef12d8049b8b4 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers.epub b/epub/docs.it4i.cz/salomon/software/debuggers.epub new file mode 100644 index 0000000000000000000000000000000000000000..1b47fb3db4f8e478a62c6703ba24caf6e75cfd7b Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/aislinn.epub b/epub/docs.it4i.cz/salomon/software/debuggers/aislinn.epub new file mode 100644 index 0000000000000000000000000000000000000000..afcf0d131c67c7ac8c7e1d3967d06705a5714f3c Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/aislinn.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.epub b/epub/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.epub new file mode 100644 index 0000000000000000000000000000000000000000..e4b73e4daf26748251be76a2dfab9e2d91e87170 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.epub b/epub/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.epub new file mode 100644 index 0000000000000000000000000000000000000000..9d4893c663a4615451854587b543a522ab140683 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.epub b/epub/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.epub new file mode 100644 index 0000000000000000000000000000000000000000..d7fe3bfa996561ac5dd7cdb33e87e97e50fd3cbe Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/summary.epub b/epub/docs.it4i.cz/salomon/software/debuggers/summary.epub new file mode 100644 index 0000000000000000000000000000000000000000..e5b753ff7b9dc873dab398c9baa77611a11bdbbe Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/summary.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/total-view.epub b/epub/docs.it4i.cz/salomon/software/debuggers/total-view.epub new file mode 100644 index 0000000000000000000000000000000000000000..c9075e6e246044b202e097bb5c22f701e8b7acc3 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/total-view.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/valgrind.epub b/epub/docs.it4i.cz/salomon/software/debuggers/valgrind.epub new file mode 100644 index 0000000000000000000000000000000000000000..e42e152b00bcb17d32f715e5db8ddb99dafbe0c7 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/valgrind.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/debuggers/vampir.epub b/epub/docs.it4i.cz/salomon/software/debuggers/vampir.epub new file mode 100644 index 0000000000000000000000000000000000000000..695d239c58acb2509986acfcc2098afc0793fd2c Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/debuggers/vampir.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.epub new file mode 100644 index 0000000000000000000000000000000000000000..e56333fc5f90b737c9b2acfd2a499287f7aa38bc Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.epub new file mode 100644 index 0000000000000000000000000000000000000000..d38cf59082fcec29fd220f3c2f61de221427cadb Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.epub new file mode 100644 index 0000000000000000000000000000000000000000..baef11c5125d0bfc42d324026f13f0d3a633286f Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.epub new file mode 100644 index 0000000000000000000000000000000000000000..8c231ea0c75d9f09429a1473b9383ef2170ceac5 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.epub new file mode 100644 index 0000000000000000000000000000000000000000..c5f882e872727946b71f54a456183be30741a026 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.epub new file mode 100644 index 0000000000000000000000000000000000000000..788fa20ad18592b53bc1014cb6df5ca49a1fde97 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..190fda9e57f4b2677eec1bfb0b8c96dc38d90db7 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.epub new file mode 100644 index 0000000000000000000000000000000000000000..8d1f3b786ebfccbaecd6b12baa396ae01cfd9e7a Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.epub b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.epub new file mode 100644 index 0000000000000000000000000000000000000000..faf101c23be3825ce38b7217b6f545076ce1639c Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/intel-xeon-phi.epub b/epub/docs.it4i.cz/salomon/software/intel-xeon-phi.epub new file mode 100644 index 0000000000000000000000000000000000000000..b2e2a74edf7d6f2a1fe8c3a682e352dfb4d9a7b6 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/intel-xeon-phi.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/java.epub b/epub/docs.it4i.cz/salomon/software/java.epub new file mode 100644 index 0000000000000000000000000000000000000000..d2255805c277e1554cbb35c323d733106bedd434 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/java.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.epub b/epub/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.epub new file mode 100644 index 0000000000000000000000000000000000000000..69f3c0b2c10d59eb4ad696f29b67c6393184aeeb Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/mpi-1/mpi.epub b/epub/docs.it4i.cz/salomon/software/mpi-1/mpi.epub new file mode 100644 index 0000000000000000000000000000000000000000..392fa607476fc51aab16f4a44531a3c8d13160ad Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/mpi-1/mpi.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.epub b/epub/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.epub new file mode 100644 index 0000000000000000000000000000000000000000..45d4f4a77c2f7898514a38909de3ae1277059eef Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/numerical-languages/introduction.epub b/epub/docs.it4i.cz/salomon/software/numerical-languages/introduction.epub new file mode 100644 index 0000000000000000000000000000000000000000..e554ec66d11e0585c0a19bcce03f770739c31fa4 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/numerical-languages/introduction.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/numerical-languages/matlab.epub b/epub/docs.it4i.cz/salomon/software/numerical-languages/matlab.epub new file mode 100644 index 0000000000000000000000000000000000000000..cec78d2ce58efd918487c76a9f3981ddcec01ce2 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/numerical-languages/matlab.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/numerical-languages/octave.epub b/epub/docs.it4i.cz/salomon/software/numerical-languages/octave.epub new file mode 100644 index 0000000000000000000000000000000000000000..cfc090fea8a98936c1d4b862a2bb8be8907da8e0 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/numerical-languages/octave.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/numerical-languages/r.epub b/epub/docs.it4i.cz/salomon/software/numerical-languages/r.epub new file mode 100644 index 0000000000000000000000000000000000000000..d810a990277342e8a7fd199b1133ca02048b40bb Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/numerical-languages/r.epub differ diff --git a/epub/docs.it4i.cz/salomon/software/operating-system.epub b/epub/docs.it4i.cz/salomon/software/operating-system.epub new file mode 100644 index 0000000000000000000000000000000000000000..b8de5be2ccadf375934665d67c72d666b1109bb3 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/software/operating-system.epub differ diff --git a/epub/docs.it4i.cz/salomon/storage/cesnet-data-storage.epub b/epub/docs.it4i.cz/salomon/storage/cesnet-data-storage.epub new file mode 100644 index 0000000000000000000000000000000000000000..f2f16dd4d1c81490f89da7fd21ecfa66167007a4 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/storage/cesnet-data-storage.epub differ diff --git a/epub/docs.it4i.cz/salomon/storage/storage.epub b/epub/docs.it4i.cz/salomon/storage/storage.epub new file mode 100644 index 0000000000000000000000000000000000000000..d25f9d880421362971f7fd386101aa577d10f549 Binary files /dev/null and b/epub/docs.it4i.cz/salomon/storage/storage.epub differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5308943dc3ff37a0f1f37f021bf73c14689b7300 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/outgoing-connections.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e90f7204c40c9dfdf34060e70b0b25ea64b4538f Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/shell-and-data-access/shell-and-data-access.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3a585caa26e5aebe769254da8f25aa25b1765eb0 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/accessing-the-cluster/vpn-access.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bb477123b4338d07cf84221e8fcbb42c5328177d Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/compute-nodes.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cd3194c35785658a49afecdb8b1c2a00dda69510 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/environment-and-modules.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.pdf new file mode 100644 index 0000000000000000000000000000000000000000..483a36f283ad31216388a14567ff0c88446f94c8 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/hardware-overview.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/introduction.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f23b15c31eb72994d23f5cd8a7de9a312cbc451d Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/network.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/network.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1e14dea1f17274aa2a41e9e44e9e46d2820137ba Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/network.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/prace.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/prace.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bd77ba673d2581c00352117195e2ff2ce68dbae8 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/prace.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6e9f0694206ee54d915b9a3dd5b4ce330d7c69d8 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/remote-visualization.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0061656849f2be0bf7dc18c1fdd87bbeec6eaca9 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/capacity-computing.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2fa136422081cb4e51b890690e057aa50e5febd7 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.pdf new file mode 100644 index 0000000000000000000000000000000000000000..68c41dd4d6c76bde4cb1cc0ba12f91f0be62bb76 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-priority.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bcbaaae16c86b86c8f88dd027ffee371c11cbbc9 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job-submission-and-execution.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e4400fb68cdc170c254d211e8ca2ffae38871d80 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/resources-allocation-policy.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys.pdf new file mode 100644 index 0000000000000000000000000000000000000000..21be1dcdd90e948d6114180b5f041b5a9a943bbd Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.pdf new file mode 100644 index 0000000000000000000000000000000000000000..15423982845f18db17ef0c101fd15d73e28fa1a4 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-cfx.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.pdf new file mode 100644 index 0000000000000000000000000000000000000000..989b1232ef1479ba09e12e92bcbbfb83c23df7e1 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-fluent.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9e967148690d5940f932623bf778d817e15389c7 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-ls-dyna.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8940e41db6b1bfbd3cf6b6f5dc7a94db95819de3 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ansys-mechanical-apdl.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0cb63f563e856499352e9b6790e8c2b2ce306a96 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/ansys/ls-dyna.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4a74a9f2038a69b6f66525768c9f0fca87308114 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/molpro.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3d1301a4fb26106d80736fab208b8b3e1684afbf Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/chemistry/nwchem.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/compilers.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/compilers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..18dc473f66b3685222012d79161f653403da8a24 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/compilers.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.pdf new file mode 100644 index 0000000000000000000000000000000000000000..269ea43271632aacef42074d58c873b4d19a541a Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/comsol/comsol-multiphysics.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..784f7068add43b6e70b43f60065a8426e08998bb Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9802ead3a7e879a022f2293c3ba5fed22fc79961 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-ddt.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c21e52c5490c3f8435e108fd3cc341f95799b736 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/allinea-performance-reports.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.pdf new file mode 100644 index 0000000000000000000000000000000000000000..af8a55b81ace8516f9ee9be6b00f85080e8b81e2 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/cube.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fa2050b2f17cfc1fe8e06e18ee32fb601886dfa0 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-performance-counter-monitor.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dfd15932200ca4e8c633041a6142471249d49d69 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/intel-vtune-amplifier.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.pdf new file mode 100644 index 0000000000000000000000000000000000000000..af1dded292635b1b4a885a382c4ba8d4ea9e9785 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/papi.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.pdf new file mode 100644 index 0000000000000000000000000000000000000000..20a23428e435969d74a76ff9b894887e3bbbbd34 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/scalasca.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.pdf new file mode 100644 index 0000000000000000000000000000000000000000..263817bb6ad4d0fefca1fa30f80c276aa7e99071 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/score-p.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5ca74248f7cd2d14230193fc459351566203e762 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/summary.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a0ed05d268c6376c687899ba6a2fd3531c15ced2 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/total-view.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0b2e34794eef6a4cefdc0bce9cc51882f76bc513 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/valgrind.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.pdf new file mode 100644 index 0000000000000000000000000000000000000000..956885abdaabe8e57f11321c88fb26ebd03a6ff5 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vampir.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f0905c24ba019f49ced5465350c2918898028701 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/gpi2.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9356edbaf609f4fee908818664f5a5955848c85a Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..79c717a6fd7a5dd046bec1dffbf15271e541c761 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-compilers.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.pdf new file mode 100644 index 0000000000000000000000000000000000000000..59d0875b313d5832870eb54f23714cdfb91101f8 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-debugger.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c1d256ddf3553d1c4e388b90b8ea9b61ea0b2c6d Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-integrated-performance-primitives.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dd19f17fe58f7c1c040bb57d0061db577bf24106 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-mkl.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..28c3fa8260ff1e067024378a4e376d9450c970f8 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-parallel-studio-introduction.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ca97afbf875e077b3b1ea4948a11830d5b7e979c Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-suite/intel-tbb.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b9eee2d68d2ed74cd77b7bf3a799a3e2cab99843 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/intel-xeon-phi.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.pdf new file mode 100644 index 0000000000000000000000000000000000000000..18bdaa5c00270474b5957fbd0117cfe784777379 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/isv_licenses.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/java.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/java.pdf new file mode 100644 index 0000000000000000000000000000000000000000..424c6f8a892de67542779a2197b53c78b1e3352c Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/java.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a26b074d10536c881f70b4c26d8f395b24ac2cda Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/kvirtualization.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e95767ce6dada54edb9fb560d7eb750c7342fa62 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/Running_OpenMPI.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.pdf new file mode 100644 index 0000000000000000000000000000000000000000..044713472fdec89b5d3d11919e4753f8f7c6bdc0 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.pdf new file mode 100644 index 0000000000000000000000000000000000000000..05bef6f92b82beb5472a2a7cf397d090ebe5528b Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/mpi4py-mpi-for-python.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a8feb3641e6a749978d8a014be0eadb1b173ce51 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/mpi-1/running-mpich2.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e69d559fd0320a984e213c63f20502afa124fa45 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/copy_of_matlab.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e7af404448bd7b713ccd7c5dec9a7a77cd82d773 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a66a544ffa95b5e2093d92998ecb5ce8928fdfce Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/matlab.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.pdf new file mode 100644 index 0000000000000000000000000000000000000000..11546ff7075dcd8b20407d7fd792073f9a6dec32 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/octave.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d792fe4993469a7599de837f845fde20a51593a7 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-languages/r.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3bac71cb6c003e1a8a075ffd82550af255bf85c1 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/fftw.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3a46a680f724e50313fb9602c732cb2eccebaa98 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/gsl.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8393582e1565ad930b116fbed32aa97a65c87e6b Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/hdf5.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.pdf new file mode 100644 index 0000000000000000000000000000000000000000..93767580d1e146a9bd27988bfaae7aae911faffc Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/intel-numerical-libraries.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.pdf new file mode 100644 index 0000000000000000000000000000000000000000..29b7b3d30a21c2804c8dca0f5bd84e5e73431a55 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/magma-for-intel-xeon-phi.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d4e7df70602aa936825321330c4baa30c2aa389a Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/petsc.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c58c9425a8f9d89ec374ba79b895b7a3fcbd7296 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/numerical-libraries/trilinos.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f00bada3d7748a5cfa1c0c6e42d199b8cb12f9ef Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/nvidia-cuda.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.pdf new file mode 100644 index 0000000000000000000000000000000000000000..939ecff976b7743f70f7cf84377833ed0bd5d155 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/diagnostic-component-team.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0b52e0d3683f7284244aac91dcdc22439f36c6a6 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/overview.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6687d7b3b93604cd11d62fcd973b5e3acaadfa40 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/priorization-component-bierapp.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.pdf new file mode 100644 index 0000000000000000000000000000000000000000..23f6bf5d2ad0bb964468a280fe93b51326861681 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/openfoam.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e83aabdc19639c742029ae1c32c984ffbda9b89e Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/operating-system.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/software/paraview.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/paraview.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7c261d3481a5f7d08e7a3c04d46527a552fb5dc8 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/software/paraview.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.pdf new file mode 100644 index 0000000000000000000000000000000000000000..022db9ed4b976b0de30bd54aa6ae0f4fc34c1c35 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/storage-1/cesnet-data-storage.pdf differ diff --git a/pdf/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.pdf b/pdf/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1fb08f371138f5b345fb4717148e4abbcc0fe098 Binary files /dev/null and b/pdf/docs.it4i.cz/anselm-cluster-documentation/storage-1/storage.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f682c956082a6b4c37487ef51b5f04cf412de924 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/cygwin-and-x11-forwarding.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.pdf new file mode 100644 index 0000000000000000000000000000000000000000..abb2ef40ef67cb2a84dedf5019c5dba948f4a335 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/graphical-user-interface.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4268ef4a327621e2b1eecb40dacbc104cfa90e9d Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0db51faee2180746833e8d04bc486e8de83c03cf Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/x-window-system.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cc32acc2457d6f210a0027eaaf15187f2c72e27a Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4a8a19bd31ae12c9fd3d3a2448d51a6dd5dec8d4 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d3b3cb497928f21df1647caa18ba1ba2110eec23 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/pageant.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6f9594f84b21ab8930198a25f29c39d2f24ef2b9 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/putty.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.pdf new file mode 100644 index 0000000000000000000000000000000000000000..aa28a02d4193e034ce29f05f6bf73ae82a9a9508 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/puttygen.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.pdf new file mode 100644 index 0000000000000000000000000000000000000000..073e9868d285a0aff7cc00680ec7e7af8168cabb Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d51c071fd7b23daf3ccf79c7df3076fb523e2c23 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/shell-access-and-data-transfer/vpn-connection-fail-in-win-8.1.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0e808e2d5d5452e3467f6b611f012ff6bc2e02c3 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/applying-for-resources.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7961082993b1fbe12ab5d5083dcaf7df04cb48ac Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/certificates-faq.pdf differ diff --git a/pdf/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.pdf b/pdf/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3dc244ba5c9cc1a475b69e71f281446c46cf41e1 Binary files /dev/null and b/pdf/docs.it4i.cz/get-started-with-it4innovations/obtaining-login-credentials/obtaining-login-credentials.pdf differ diff --git a/pdf/docs.it4i.cz/index.pdf b/pdf/docs.it4i.cz/index.pdf new file mode 100644 index 0000000000000000000000000000000000000000..33190eabec63398358a103ec1a9e7b31a26d8509 Binary files /dev/null and b/pdf/docs.it4i.cz/index.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/accessing-the-cluster.pdf b/pdf/docs.it4i.cz/salomon/accessing-the-cluster.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d30280e98083f079a0fc8f548de6308550cc1607 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/accessing-the-cluster.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.pdf b/pdf/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0361def6aebb260c0834d83a8102cb9284127c52 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/accessing-the-cluster/outgoing-connections.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.pdf b/pdf/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5aec791d220f7c8801a15f1f2e3b12b60dd8248b Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/accessing-the-cluster/vpn-access.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/environment-and-modules.pdf b/pdf/docs.it4i.cz/salomon/environment-and-modules.pdf new file mode 100644 index 0000000000000000000000000000000000000000..83538f185719db5190c4dc59af804ac2eb58f312 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/environment-and-modules.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.pdf b/pdf/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dfa58e60d79f04184ef2b6963248d7afd334b1e2 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/hardware-overview-1/hardware-overview.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/introduction.pdf b/pdf/docs.it4i.cz/salomon/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..215b39e5cec1e0bd97c648c387cd1cabc134008c Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.pdf b/pdf/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.pdf new file mode 100644 index 0000000000000000000000000000000000000000..59d10b7b473f0eff99d5f176d4e5a897c9419bbe Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/network-1/7d-enhanced-hypercube.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.pdf b/pdf/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.pdf new file mode 100644 index 0000000000000000000000000000000000000000..adc8a8c08e63e80323db328f1734f40aac98f7a2 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/network-1/ib-single-plane-topology.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/network-1/network.pdf b/pdf/docs.it4i.cz/salomon/network-1/network.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9c36328f37fcb745f43759bc0d724db715c4bb79 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/network-1/network.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/prace.pdf b/pdf/docs.it4i.cz/salomon/prace.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f09f7510382d721ec620be7e72079160a0f7ed0a Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/prace.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.pdf b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.pdf new file mode 100644 index 0000000000000000000000000000000000000000..34741230adb3283cf3487d7543b1e04a3b7c35ab Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/capacity-computing.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.pdf b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6b2e5323cff47aa4cf22bc1f146ce4ebae77e0e0 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.pdf b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9b3c1013b7e754a490bbad5ba1b99056f1c5dce7 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-priority.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.pdf b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5cbb0fdbb3d57d5cd8d326716b46211aa3dd8c1c Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job-submission-and-execution.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.pdf b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.pdf new file mode 100644 index 0000000000000000000000000000000000000000..21778f42c440432a03a3eb7d9f2c6e6559b8d72b Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/resource-allocation-and-job-execution/resources-allocation-policy.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/ansys-cfx.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-cfx.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c4255f79c4099fba7f668042d175c7acc232e71d Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-cfx.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/ansys-fluent.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-fluent.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f97e248c90ffdcae1cf6269621f55d0b854c9f40 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-fluent.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dee775771587a1e85df91cc57d7e2c7271a39dde Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-ls-dyna.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a222520f3797414cca9d56b4bb5158cc4e97263b Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-mechanical-apdl.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.pdf new file mode 100644 index 0000000000000000000000000000000000000000..70685a89865ba21f28ffb9432418b93153cbe228 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/ansys-products-mechanical-fluent-cfx-mapdl.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/ansys.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/ansys.pdf new file mode 100644 index 0000000000000000000000000000000000000000..de70c0047443aa8d986b8d546a57cc2085289887 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/ansys.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/licensing.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/licensing.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fb6a213d1261e1be62be3ec17ed8bd26b02f887c Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/licensing.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.pdf new file mode 100644 index 0000000000000000000000000000000000000000..44c23f00d840c54ed542bfc8b5edf996e429aa1d Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/setting-license-preferences.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/ansys/workbench.pdf b/pdf/docs.it4i.cz/salomon/software/ansys/workbench.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d0ec2e539b7d6cca65ed66ffccdaf8d904d79364 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/ansys/workbench.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/chemistry/molpro.pdf b/pdf/docs.it4i.cz/salomon/software/chemistry/molpro.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4853be3253284621210e8221ccf01faf62822e7f Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/chemistry/molpro.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/chemistry/nwchem.pdf b/pdf/docs.it4i.cz/salomon/software/chemistry/nwchem.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e46ef962adce62a4605ae3a757519eafae415a62 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/chemistry/nwchem.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/chemistry/phono3py.pdf b/pdf/docs.it4i.cz/salomon/software/chemistry/phono3py.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e53a712c55740860a326016c12d05114a8785a1c Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/chemistry/phono3py.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/compilers.pdf b/pdf/docs.it4i.cz/salomon/software/compilers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..68792fbb9eaf80638cc0b4b2fe123c3fd5d03dd6 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/compilers.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.pdf b/pdf/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8813d1f0b5f02a7f904167ba0d013541068789ad Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/comsol/comsol-multiphysics.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.pdf b/pdf/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e67fc7e82e11fa0adbc81ae5e58e39b301bc03ad Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/comsol/licensing-and-available-versions.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..09aee3ab64b068aecc3e6b96ee1a3c6434fc9a4d Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/aislinn.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/aislinn.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e615f23854047f6156ad06ed56d6e01e49552927 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/aislinn.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b1f064a6ee6b4dac087aad90b4d20577fd003d0f Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/allinea-ddt.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.pdf new file mode 100644 index 0000000000000000000000000000000000000000..23a9e41e2ad7929a9d0e0366871a3b87c5229b9c Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/allinea-performance-reports.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d4228c4f84976e70f4975548ee933c8057004aa2 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/intel-vtune-amplifier.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/summary.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/summary.pdf new file mode 100644 index 0000000000000000000000000000000000000000..697cfdc590a905b556442db2ac6f86c8dc5d778c Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/summary.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/total-view.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/total-view.pdf new file mode 100644 index 0000000000000000000000000000000000000000..88f2a18aaa2ea69c67a79ab25d03051624439bb5 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/total-view.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/valgrind.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/valgrind.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1210ad053c7db6bb77c613be6a2143c346a3c920 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/valgrind.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/debuggers/vampir.pdf b/pdf/docs.it4i.cz/salomon/software/debuggers/vampir.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c7c15fb43c38651ffcf4481bf29f0a2de4091ad5 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/debuggers/vampir.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.pdf new file mode 100644 index 0000000000000000000000000000000000000000..12a97720853cfd3a5a1c6011b158a300586cf33a Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-advisor.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.pdf new file mode 100644 index 0000000000000000000000000000000000000000..51c4ced1cf8319654cfd7514229d5fa4cde2bac2 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-compilers.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a69b0468f7197134100c8255d738b817798747c0 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-debugger.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.pdf new file mode 100644 index 0000000000000000000000000000000000000000..eb55fc8d718a133c820d4920f087d4846ee66ee4 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-inspector.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7b8f6dc64954cc8b154f207c99d733acd5da9d68 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-integrated-performance-primitives.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.pdf new file mode 100644 index 0000000000000000000000000000000000000000..96ac3ebd361d42c75fdc1c19edc8c74cfa902544 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-mkl.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3b805f2de891a80bccf8d734ac2b7e1df792995f Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-parallel-studio-introduction.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1419213fa9514aa6a72b88fd8ba534b688874c9e Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-tbb.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.pdf b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.pdf new file mode 100644 index 0000000000000000000000000000000000000000..842ca0b7e8e8d68f40d1425c066815d8ec3a8ea5 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-suite/intel-trace-analyzer-and-collector.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/intel-xeon-phi.pdf b/pdf/docs.it4i.cz/salomon/software/intel-xeon-phi.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a6964daeacf48f2c9243fa3653242680ca3c2f07 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/intel-xeon-phi.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/java.pdf b/pdf/docs.it4i.cz/salomon/software/java.pdf new file mode 100644 index 0000000000000000000000000000000000000000..143b0d136175ddebf5596a47750dc4ec565db50c Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/java.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.pdf b/pdf/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.pdf new file mode 100644 index 0000000000000000000000000000000000000000..49c309af9e936173bcd08ca0a145185b2bc66090 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/mpi-1/Running_OpenMPI.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/mpi-1/mpi.pdf b/pdf/docs.it4i.cz/salomon/software/mpi-1/mpi.pdf new file mode 100644 index 0000000000000000000000000000000000000000..879cc5d7f117f71c36d3a9934f7dc712434997b0 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/mpi-1/mpi.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.pdf b/pdf/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f82021e03a59b974bc7dcf441f02e933aa882a8b Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/mpi-1/mpi4py-mpi-for-python.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/numerical-languages/introduction.pdf b/pdf/docs.it4i.cz/salomon/software/numerical-languages/introduction.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f99a0275251edf22c888fd40fb1cfd813dcfd273 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/numerical-languages/introduction.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/numerical-languages/matlab.pdf b/pdf/docs.it4i.cz/salomon/software/numerical-languages/matlab.pdf new file mode 100644 index 0000000000000000000000000000000000000000..60918697517fd30faf3a305b555272d2ac05c4a1 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/numerical-languages/matlab.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/numerical-languages/octave.pdf b/pdf/docs.it4i.cz/salomon/software/numerical-languages/octave.pdf new file mode 100644 index 0000000000000000000000000000000000000000..65b31c2686eab7718d429d7675d37d62eb40dce7 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/numerical-languages/octave.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/numerical-languages/r.pdf b/pdf/docs.it4i.cz/salomon/software/numerical-languages/r.pdf new file mode 100644 index 0000000000000000000000000000000000000000..29880f63a7ce25013cfa75e33e9b66dbfffd4dc9 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/numerical-languages/r.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/software/operating-system.pdf b/pdf/docs.it4i.cz/salomon/software/operating-system.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7662d8edb63c5ece99f2384bb8fe2b9f882d78ac Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/software/operating-system.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/storage/cesnet-data-storage.pdf b/pdf/docs.it4i.cz/salomon/storage/cesnet-data-storage.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0b2b16d53fb2bd7f3912c2de94d92f5145856f11 Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/storage/cesnet-data-storage.pdf differ diff --git a/pdf/docs.it4i.cz/salomon/storage/storage.pdf b/pdf/docs.it4i.cz/salomon/storage/storage.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a2d144f1932cedf8fd03fe00584b6d3c8ad17ffb Binary files /dev/null and b/pdf/docs.it4i.cz/salomon/storage/storage.pdf differ