master to master · SCS / docs.it4i.cz

Some changes are not shown.

For a faster browsing experience, only 71 of 192 files are shown. Download one of the files below to see all changes.

.gitlab-ci.yml

+11 −15

Original line number	Original line	Diff line number	Diff line
	@@ -12,7 +12,7 @@ docs:
	image: it4innovations/docker-mdcheck:latest		image: it4innovations/docker-mdcheck:latest
	allow_failure: true		allow_failure: true
	script:		script:
	- mdl -r ~MD013,~MD010,~MD014,~MD024,~MD026,~MD029,~MD033,~MD036,~MD037,~MD046 *.md docs.it4i # BUGS		- find content/docs -name "*.mdx" \| xargs mdl -r ~MD002,~MD007,~MD013,~MD010,~MD014,~MD024,~MD026,~MD029,~MD033,~MD036,~MD037,~MD046

	pylint:		pylint:
	stage: test		stage: test
	@@ -22,20 +22,16 @@ pylint:
	script:		script:
	- pylint $(find . -name "*.py" -not -name "feslicescript.py")		- pylint $(find . -name "*.py" -not -name "feslicescript.py")

	pysafety:		capitalize:
	stage: test		stage: test
	image: it4innovations/docker-pycheck:latest		image: it4innovations/docker-mkdocscheck:latest
	allow_failure: true		allow_failure: true
	before_script:		before_script:
	- source /opt/.venv3/bin/activate		- source /opt/.venv3/bin/activate
			- python -V # debug
			- pip list \| grep titlecase
	script:		script:
	- cat requirements.txt \| safety check --stdin --full-report		- find content/docs/ $ -name '.mdx' -o -name '.yml' $ ! -path 'einfracz' -print0 \| xargs -0 -n1 scripts/titlemd.py --test

	capitalize:
	stage: test
	image: it4innovations/docker-mkdocscheck:latest
	script:
	- find mkdocs.yml docs.it4i/ $ -name '.md' -o -name '.yml' $ -print0 \| xargs -0 -n1 scripts/titlemd.py --test

	ext_links:		ext_links:
	stage: after_test		stage: after_test
	@@ -45,7 +41,7 @@ ext_links:
	# remove JSON results		# remove JSON results
	- rm *.json		- rm *.json
	script:		script:
	- find docs.it4i/ -name '*.md' -exec grep --color -l http {} + \| xargs awesome_bot -t 10 --allow-dupe --allow-redirect		- find content/docs -name '*.mdx' -exec grep --color -l http {} + \| xargs awesome_bot -t 10 --allow-dupe --allow-redirect
	only:		only:
	- master		- master

	@@ -55,8 +51,8 @@ ext_links:
	before_script:		before_script:
	- echo "192.168.101.10 docs.it4i.cz" >> /etc/hosts		- echo "192.168.101.10 docs.it4i.cz" >> /etc/hosts
	- wget -V		- wget -V
	- echo https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/		- echo https://docs.it4i.cz/devel/$CI_COMMIT_REF_NAME/
	- wget --spider -e robots=off -o wget.log -r -p https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/ \|\| true		- wget --spider -e robots=off -o wget.log -r -p https://docs.it4i.cz/devel/$CI_COMMIT_REF_NAME/ \|\| true
	script:		script:
	- cat wget.log \| awk '/^Found [0-9]+ broken link[s]?.$/,/FINISHED/ { rc=-1; print $0 }; END { exit rc }'		- cat wget.log \| awk '/^Found [0-9]+ broken link[s]?.$/,/FINISHED/ { rc=-1; print $0 }; END { exit rc }'

	@@ -75,7 +71,7 @@ mkdocs:
	# get modules list from clusters		# get modules list from clusters
	- bash scripts/get_modules.sh		- bash scripts/get_modules.sh
	# generate site_url		# generate site_url
	- (if [ "${CI_BUILD_REF_NAME}" != 'master' ]; then sed -i "s/$site_url.*$$/\1devel\/$CI_BUILD_REF_NAME\//" mkdocs.yml;fi);		- (if [ "${CI_COMMIT_REF_NAME}" != 'master' ]; then sed -i "s/$site_url.*$$/\1devel\/$CI_COMMIT_REF_NAME\//" mkdocs.yml;fi);
	# generate ULT for code link		# generate ULT for code link
	# - sed -i "s/master/$CI_BUILD_REF_NAME/g" material/partials/toc.html		# - sed -i "s/master/$CI_BUILD_REF_NAME/g" material/partials/toc.html
	# regenerate modules matrix		# regenerate modules matrix
	@@ -113,7 +109,7 @@ deploy to stage:
	- echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config		- echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
	script:		script:
	- chown nginx:nginx site -R		- chown nginx:nginx site -R
	- rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/devel/$CI_BUILD_REF_NAME/		- rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/devel/$CI_COMMIT_REF_NAME/
	only:		only:
	- branches@sccs/docs.it4i.cz		- branches@sccs/docs.it4i.cz

.spelling

+27 −0

Original line number	Original line	Diff line number	Diff line
			Quantum Scalar I6
			JAN
			LUMI
			AI
			CI/CD
			AWS
			CLI
			FAQ
			s3cmd
			GUI
			EESSI
	hipBlas		hipBlas
	hipSolver		hipSolver
	LUMI		LUMI
	@@ -822,3 +833,19 @@ e-INFRA CZ
	DICE		DICE
	qgpu		qgpu
	qcpu		qcpu
			it4i-portal-clients
			it4icheckaccess
			it4idedicatedtime
			it4ifree
			it4ifsusage
			it4iuserfsusage
			it4iprojectfsusage
			it4imotd
			e-INFRA
			it4i-portal-clients
			s3cmd
			s5cmd
			title:
			e-INFRA CZ Cloud Ostrava
			e-INFRA CZ Account

README.md

+3 −53

Original line number	Original line	Diff line number	Diff line
	# User Documentation		# IT4Inovations Documentation

	This project contains IT4Innovations user documentation source.		This project contains IT4Innovations user documentation source.

	## Development		## Migration

	### Install		* [fumadocs](https://fumadocs.vercel.app/)
			No newline at end of file
	```console
	$ sudo apt install libpython-dev
	$ virtualenv venv
	$ source venv/bin/activate
	$ pip install -r requirements.txt
	```

	### Package Upgrade With pip

	```console
	$ pip list -o
	$ pip install --upgrade package
	$ pip freeze \| sed '/pkg-resources==/d' > requirements.txt
	```

	## Environments

	* [https://docs.it4i.cz - master branch](https://docs.it4i.cz - master branch)
	* [https://docs.it4i.cz/devel/$BRANCH_NAME](https://docs.it4i.cz/devel/$BRANCH_NAME) - maps the branches, available only with VPN access

	## URLs

	* [http://facelessuser.github.io/pymdown-extensions/](http://facelessuser.github.io/pymdown-extensions/)
	* [http://squidfunk.github.io/mkdocs-material/](http://squidfunk.github.io/mkdocs-material/)

	```
	fair-share
	InfiniBand
	RedHat
	CentOS
	Mellanox
	```

	## Mathematical Formulae

	### Formulas Are Made With:

	* [https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/](https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/)
	* [https://www.mathjax.org/](https://www.mathjax.org/)

	You can add formula to page like this:

	```
	$$
	MAX\_FAIRSHARE * ( 1 - \frac{usage_{Project}}{usage_{Total}} )
	$$
	```

	To enable the MathJX on page you need to enable it by adding line ```---8<--- "mathjax.md"``` at the end of file.

docs.it4i/apiv2.md

0 → 100644

+203 −0

Original line number	Original line	Diff line number	Diff line
			# SCS API v2

			## Info

			- OpenAPI: 3.1.0
			- Title: scs-api-2
			- Version: 0.1.0
			- Server URL: `https://scs.it4i.cz/api/v2`

			## Paths

			### `/dedicated-time`

			GET

			- Summary: Get dedicated times
			- Description: Retrieves dedicated time entries, optionally filtered by cluster name or period preset
			- OperationId: `dedicated_time_handler`

			Parameters:

			- `cluster` (query): Filter by cluster name; Available values: karolina, barbora, dgx (optional)
			- `period` (query): Filter by time period preset; Available values: planned, active (optional)

			Responses:

			- `200`: List of dedicated time entries
			- `400`: Failed to deserialize query, Invalid cluster, Invalid period
			Example:

			```json
			{
			"message": "Invalid cluster: el_gordo"
			}
			```
			- `500`: Failed to retrieve dedicated time due to a server error
			Example:
			```json
			{
			"message": "Failed to retreive dedicated time"
			}
			```

			### `/dedicated-time-calendar`

			GET

			- Summary: Get dedicated times
			- Description: Retrieves dedicated time entries and generates a VCalendar response.
			- OperationId: `dedicated_time_calendar`

			Responses:

			- `200`: Dedicated time VCalendar
			Example:

			```
			BEGIN:VCALENDAR
			VERSION:2.0
			PRODID:-//SUTD Timetable Calendar//randName//EN
			CALSCALE:GREGORIAN
			BEGIN:VEVENT
			UID:1234@example.com
			DTSTAMP:20230101T000000Z
			DTSTART:20230101T000000Z
			DTEND:20230102T000000Z
			SUMMARY:Sample Dedicated Time - Cluster Outage
			DESCRIPTION:Sample Dedicated Time - Cluster Outage
			END:VEVENT
			END:VCALENDAR
			```

			- `500`: Failed to retrieve dedicated time calendar
			Example:

			```json
			{
			"message": "Failed to retreive dedicated time calendar"
			}
			```

			### `/motd`

			GET

			- Summary: Get messages of the day
			- Description: Retrieves messages of the day, optionally filtered by category
			- OperationId: `motd`

			Parameters:

			- `category` (query): (optional)

			Responses:

			- `200`: List of motd entries
			- `400`: Failed to deserialize query, Invalid motd category
			- `500`: Failed to retrieve motd entries due to a server error
			Example:

			```json
			{
			"message": "Failed to retrieve motd"
			}
			```

			## Components

			### Schemas

			#### DedicatedTime

			```yaml
			type: object
			required:
			- updated_at
			properties:
			cluster_type:
			type: [string, 'null']
			date_efficiency:
			type: [string, 'null']
			format: date-time
			date_expiration:
			type: [string, 'null']
			format: date-time
			updated_at:
			type: string
			format: date-time
			```

			#### Motd

			```yaml
			type: object
			required:
			- id
			- author
			- category
			- created_at
			- updated_at
			- date_modification
			- title
			- message_body
			- systems
			properties:
			id:
			type: integer
			format: int32
			examples: [1]
			author:
			type: string
			examples: [Admin]
			category:
			type: string
			examples: [public-service-announcement]
			created_at:
			type: string
			format: date-time
			updated_at:
			type: string
			format: date-time
			date_modification:
			type: string
			format: date-time
			date_efficiency:
			type: [string, 'null']
			format: date-time
			date_expiration:
			type: [string, 'null']
			format: date-time
			date_outage_efficiency:
			type: [string, 'null']
			format: date-time
			date_outage_expiration:
			type: [string, 'null']
			format: date-time
			title:
			type: string
			examples: [Important Update]
			message_body:
			type: string
			examples: [We are experiencing some service disruptions.]
			systems:
			type: array
			items:
			type: string
			examples: [Karolina]
			```

			#### MsgResponse

			```yaml
			type: object
			description: \|
			Common struct for DTO-less responses
			eg. ```200 {"message":"Operation succeeded"}```
			required:
			- message
			properties:
			message:
			type: string
			examples: [API response]
			```

docs.it4i/barbora-ng/hardware-overview.md

0 → 100644

+45 −0

Original line number	Original line	Diff line number	Diff line
			# Hardware Overview

			!!!important Work in progress
			Barbora NG documentation is a WIP.
			The documentation is still being developed (reflecting changes in technical specifications) and may be updated frequently.

			The launch of Barbora NG is planned for October/November.
			In the meantime, the first computational resources have already been allocated in the latest Open Access Grant Competition.

			Barbora NG consists of 141 non-accelerated compute nodes named cn[001-141].
			Each node is a powerful x86-64 computer equipped with 192 cores
			(2x Intel Xeon 6952P with 96 CPU cores) and 768 GB RAM.
			User access to the Barbora NG cluster is provided by two login nodes login[1-2].
			The nodes are interlinked through high speed InfiniBand NDR and Ethernet networks.

			The parameters are summarized in the following tables:

			\| In general \| \|
			\| ------------------------------------ \| --------------------- \|
			\| Architecture of compute nodes \| x86-64 \|
			\| Operating system \| Linux \|
			\| [Compute nodes][1] \| \|
			\| Total \| 141 \|
			\| Processor Type \| [Intel Xeon 6952P][b] \|
			\| Architecture \| Granite Rapids \|
			\| Processor cores \| 96 \|
			\| Processors per node \| 2 \|
			\| RAM \| 768 GB \|
			\| Local disk drive \| no \|
			\| Compute network \| InfiniBand HDR \|
			\| non-accelerated \| 141, cn[001-141] \|
			\| In total \| \|
			\| Theoretical peak performance (Rpeak) \| ??? TFLOP/s \|
			\| Cores \| 27072 \|
			\| RAM \| 108.288 TB \|

			[1]: compute-nodes.md
			[2]: ../general/resources-allocation-policy.md
			[3]: network.md
			[4]: storage.md
			[5]: ../general/shell-and-data-access.md
			[6]: visualization.md

			[a]: https://support.it4i.cz/rt
			[b]: https://www.intel.com/content/www/us/en/products/sku/241643/intel-xeon-6952p-processor-480m-cache-2-10-ghz/specifications.html
			No newline at end of file

docs.it4i/barbora-ng/introduction.md

0 → 100644

+36 −0

Original line number	Original line	Diff line number	Diff line
			# Introduction

			!!!important Work in progress
			Barbora NG documentation is a WIP.
			The documentation is still being developed (reflecting changes in technical specifications) and may be updated frequently.

			The launch of Barbora NG is planned for October/November.
			In the meantime, the first computational resources have already been allocated in the latest Open Access Grant Competition.

			Welcome to Barbora Next Gen (NG) supercomputer cluster.
			Barbora NG is our latest supercomputer which consists of 141 compute nodes,
			totaling 27072 compute cores with 108288 GB RAM, giving over ??? TFLOP/s theoretical peak performance.

			Nodes are interconnected through a fully non-blocking fat-tree InfiniBand NDR network
			and are equipped with Intel Granite Rapids processors.
			Read more in [Hardware Overview][1].

			The cluster runs with an operating system compatible with the Red Hat [Linux family][a]. We have installed a wide range of software packages targeted at different scientific domains.
			These packages are accessible via the [modules environment][2].

			The user data shared file system and job data shared file system are available to users.

			The [Slurm][b] workload manager provides [computing resources allocations and job execution][3].

			Read more on how to [apply for resources][4], [obtain login credentials][5] and [access the cluster][6].


			[1]: hardware-overview.md
			[2]: ../environment-and-modules.md
			[3]: ../general/resources-allocation-policy.md
			[4]: ../general/applying-for-resources.md
			[5]: ../general/obtaining-login-credentials/obtaining-login-credentials.md
			[6]: ../general/shell-and-data-access.md

			[a]: http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg
			[b]: https://slurm.schedmd.com/

docs.it4i/barbora/introduction.md

+2 −2

Original line number	Original line	Diff line number	Diff line
	@@ -8,7 +8,7 @@ The cluster runs with an operating system compatible with the Red Hat [Linux fam

	The user data shared file system and job data shared file system are available to users.		The user data shared file system and job data shared file system are available to users.

	The [PBS Professional Open Source Project][b] workload manager provides [computing resources allocations and job execution][3].		The [Slurm][b] workload manager provides [computing resources allocations and job execution][3].

	Read more on how to [apply for resources][4], [obtain login credentials][5] and [access the cluster][6].		Read more on how to [apply for resources][4], [obtain login credentials][5] and [access the cluster][6].

	@@ -22,4 +22,4 @@ Read more on how to [apply for resources][4], [obtain login credentials][5] and
	[6]: ../general/shell-and-data-access.md		[6]: ../general/shell-and-data-access.md

	[a]: http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg		[a]: http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg
	[b]: https://www.pbspro.org/		[b]: https://slurm.schedmd.com/

docs.it4i/barbora/storage.md

+1 −1

Original line number	Original line	Diff line number	Diff line
	@@ -120,7 +120,7 @@ The filesystem is backed up, so that it can be restored in case of a catastrophi

	The SCRATCH is realized as Lustre parallel file system and is available from all login and computational nodes. There are 5 OSTs dedicated for the SCRATCH file system.		The SCRATCH is realized as Lustre parallel file system and is available from all login and computational nodes. There are 5 OSTs dedicated for the SCRATCH file system.

	The SCRATCH filesystem is mounted in directory /scratch. Users may freely create subdirectories and files on the filesystem. Accessible capacity is 310TB, shared among all users. Individual users are restricted by filesystem usage quotas, set to 10TB per user. The purpose of this quota is to prevent runaway programs from filling the entire filesystem and deny service to other users. Should 10TB prove insufficient, contact [support][d], the quota may be lifted upon request.		The SCRATCH filesystem is mounted in the `/scratch/project/PROJECT_ID` directory created automatically with the `PROJECT_ID` project. Accessible capacity is 310TB, shared among all users. Individual users are restricted by filesystem usage quotas, set to 10TB per user. The purpose of this quota is to prevent runaway programs from filling the entire filesystem and deny service to other users. Should 10TB prove insufficient, contact [support][d], the quota may be lifted upon request.

	!!! note		!!! note
	The Scratch filesystem is intended for temporary scratch data generated during the calculation as well as for high-performance access to input and output files. All I/O intensive jobs must use the SCRATCH filesystem as their working directory.		The Scratch filesystem is intended for temporary scratch data generated during the calculation as well as for high-performance access to input and output files. All I/O intensive jobs must use the SCRATCH filesystem as their working directory.

docs.it4i/cloud/einfracz-cloud.md

+8 −8

Original line number	Original line	Diff line number	Diff line
	# e-INFRA CZ Cloud Ostrava		# e-INFRA CZ Cloud Ostrava

	Ostrava cloud consists of 28 nodes from [Karolina][a] supercomputer.		Ostrava cloud consists of 22 nodes from the [Karolina][a] supercomputer.
	The cloud site is built on top of OpenStack,		The cloud site is built on top of OpenStack,
	which is a free open standard cloud computing platform.		which is a free open standard cloud computing platform.

	@@ -61,15 +61,15 @@ For the list of deployed OpenStack services, see the [list of components][1].

	More information can be found on the [e-INFRA CZ website][2].		More information can be found on the [e-INFRA CZ website][2].

	[1]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-site/openstack-components/		[1]: https://docs.platforms.cloud.e-infra.cz/en/docs/technical-reference/ostrava-g2-site/openstack-components
	[2]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-site/		[2]: https://docs.platforms.cloud.e-infra.cz/en/docs/technical-reference/ostrava-g2-site
	[3]: https://docs.e-infra.cz/account/		[3]: https://docs.account.e-infra.cz/en/docs/access/account#how-to-apply-for-the-first-time
	[4]: https://docs.e-infra.cz/compute/openstack/getting-started/creating-first-infrastructure/		[4]: https://docs.platforms.cloud.e-infra.cz/en/docs/getting-started/creating-first-infrastructure
	[5]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-site/quota-limits/		[5]: https://docs.platforms.cloud.e-infra.cz/en/docs/technical-reference/ostrava-g2-site/quota-limits
	[6]: https://ostrava.openstack.cloud.e-infra.cz/		[6]: https://ostrava.openstack.cloud.e-infra.cz/
	[7]: https://docs.fuga.cloud/how-to-use-the-openstack-cli-tools-on-linux		[7]: https://cyso.cloud/docs/cloud/extra/how-to-use-the-openstack-cli-tools-on-linux/
	[8]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/terraform		[8]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/terraform
	[9]: https://docs.e-infra.cz/compute/openstack/how-to-guides/obtaining-api-key/		[9]: https://docs.platforms.cloud.e-infra.cz/en/docs/how-to-guides/obtaining-api-key
	[10]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/commandline		[10]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/commandline

	[a]: ../karolina/introduction.md		[a]: ../karolina/introduction.md

docs.it4i/cloud/it4i-cloud.md

0 → 100644

+143 −0

Original line number	Original line	Diff line number	Diff line
			# IT4I Cloud

			IT4I cloud consists of 14 nodes from the [Karolina][a] supercomputer.
			The cloud site is built on top of OpenStack,
			which is a free open standard cloud computing platform.

			!!! Note
			The guide describes steps for personal projects.<br>
			Some steps may differ for large projects.<br>
			For large project, apply for resources to the [Allocation Committee][11].

			## Access

			To access the cloud you must be a member of an active EUROHPC project,
			or fall into the Access Category B, i.e. [Access For Thematic HPC Resource Utilisation][11].

			A personal OpenStack project is required. Request one by contacting [IT4I Support][12].

			The dashboard is available at [https://cloud.it4i.cz][6].

			You can see quotas set for the IT4I Cloud in the [Quota Limits][f] section.

			## Creating First Instance

			To create your first VM instance, follow the steps below:

			### Log In

			Go to [https://cloud.it4i.cz][6], enter your LDAP username and password and choose the `IT4I_LDAP` domain. After you sign in, you will be redirected to the dashboard.

			![](../img/login.png)

			### Create Key Pair

			SSH key is required for remote access to your instance.

			1. Go to Project > Compute > Key Pairs and click the Create Key Pair button.

			![](../img/keypairs.png)

			1. In the Create Key Pair window, name your key pair, select `SSH Key` for key type and confirm by clicking Create Key Pair.

			![](../img/keypairs1.png)

			1. Download and manage the private key according to your operating system.

			### Update Security Group

			To be able to remotely access your VM instance, you have to allow access in the security group.

			1. Go to Project > Network > Security Groups and click on Manage Rules, for the default security group.

			![](../img/securityg.png)

			1. Click on Add Rule, choose SSH, and leave the remaining fields unchanged.

			![](../img/securityg1.png)

			### Create VM Instance

			1. In Compute > Instances, click Launch Instance.

			![](../img/instance.png)

			1. Choose Instance Name, Description, and number of instances. Click Next.

			![](../img/instance1.png)

			1. Choose an image from which to boot the instance. Choose to delete the volume after instance delete. Click Next.

			![](../img/instance2.png)

			1. Choose the hardware resources of the instance by selecting a flavor. Additional volumes for data can be attached later on. Click Next.

			![](../img/instance3.png)

			1. Select the network and continue to Security Groups.

			![](../img/instance4.png)

			1. Allocate the security group with SSH rule that you added in the [Update Security Group](it4i-cloud.md#update-security-group) step. Then click Next to go to the Key Pair.

			![](../img/securityg2.png)

			1. Select the key that you created in the [Create Key Pair][g] section and launch the instance.

			![](../img/instance5.png)

			### Associate Floating IP

			1. Click on the Associate button next to the floating IP.

			![](../img/floatingip.png)

			1. Select Port to be associated with the instance, then click the Associate button.

			Now you can join the VM using your preferred SSH client.

			## Process Automatization

			You can automate the process using Openstack.

			### OpenStack

			Prerequisites:

			* Linux/Mac/WSL terminal BASH shell
			* installed [OpenStack client][7]

			Follow the guide: [https://code.it4i.cz/commandline][10]

			Run commands:

			```console
			source project_openrc.sh.inc
			```

			```console
			./cmdline-demo.sh basic-infrastructure-1
			```

			[1]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-site/openstack-components/
			[2]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-site/
			[3]: https://docs.e-infra.cz/account/
			[4]: https://docs.e-infra.cz/compute/openstack/getting-started/creating-first-infrastructure/
			[5]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-g2-site/quota-limits/
			[6]: https://cloud.it4i.cz
			[7]: https://docs.fuga.cloud/how-to-use-the-openstack-cli-tools-on-linux
			[8]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/terraform
			[9]: https://docs.e-infra.cz/compute/openstack/how-to-guides/obtaining-api-key/
			[10]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/commandline
			[11]: https://www.it4i.cz/en/for-users/computing-resources-allocation
			[12]: mailto:support@it4i.cz @@

			[a]: ../karolina/introduction.md
			[b]: ../general/access/project-access.md
			[c]: einfracz-cloud.md
			[d]: ../general/accessing-the-clusters/vpn-access.md
			[e]: ../general/obtaining-login-credentials/obtaining-login-credentials.md
			[f]: it4i-quotas.md
			[g]: it4i-cloud.md#create-key-pair

docs.it4i/cloud/it4i-quotas.md

0 → 100644

+31 −0

Original line number	Original line	Diff line number	Diff line
			# IT4I Cloud Quotas

			\| Resource \| Quota \|
			\|---------------------------------------\|-------\|
			\| Instances \| 10 \|
			\| VCPUs \| 20 \|
			\| RAM \| 32GB \|
			\| Volumes \| 20 \|
			\| Volume Snapshots \| 12 \|
			\| Volume Storage \| 500 \|
			\| Floating-IPs \| 1 \|
			\| Security Groups \| 10 \|
			\| Security Group Rules \| 100 \|
			\| Networks \| 1 \|
			\| Ports \| 10 \|
			\| Routers \| 1 \|
			\| Backups \| 12 \|
			\| Groups \| 10 \|
			\| rbac_policies \| 10 \|
			\| Subnets \| 1 \|
			\| Subnet_pools \| -1 \|
			\| Fixed-ips \| -1 \|
			\| Injected-file-size \| 10240 \|
			\| Injected-path-size \| 255 \|
			\| Injected-files \| 5 \|
			\| Key-pairs \| 100 \|
			\| Properties \| 128 \|
			\| Server-groups \| 10 \|
			\| Server-group-members \| 10 \|
			\| Backup-gigabytes \| 1002 \|
			\| Per-volume-gigabytes \| -1 \|

docs.it4i/cs/amd.md→docs.it4i/cs/guides/amd.md

+0 −0

File moved.

docs.it4i/cs/arm.md→docs.it4i/cs/guides/arm.md

+0 −0

File moved.

docs.it4i/cs/guides/grace.md

0 → 100644

+301 −0

Original line number	Original line	Diff line number	Diff line
			# Using NVIDIA Grace Partition

			For testing your application on the NVIDIA Grace Partition,
			you need to prepare a job script for that partition or use the interactive job:

			```console
			salloc -N 1 -c 144 -A PROJECT-ID -p p11-grace --time=08:00:00
			```

			where:

			- `-N 1` means allocation single node,
			- `-c 144` means allocation 144 cores,
			- `-p p11-grace` is NVIDIA Grace partition,
			- `--time=08:00:00` means allocation for 8 hours.

			## Available Toolchains

			The platform offers three toolchains:

			- Standard GCC (as a module `ml GCC`)
			- [NVHPC](https://developer.nvidia.com/hpc-sdk) (as a module `ml NVHPC`)
			- [Clang for NVIDIA Grace](https://developer.nvidia.com/grace/clang) (installed in `/opt/nvidia/clang`)

			!!! note
			The NVHPC toolchain showed strong results with minimal amount of tuning necessary in our initial evaluation.

			### GCC Toolchain

			The GCC compiler seems to struggle with vectorization of short (constant length) loops, which tend to get completely unrolled/eliminated instead of being vectorized. For example simple nested loop such as

			```cpp
			for(int i = 0; i < 1000000; ++i) {
			// Iterations dependent in "i"
			// ...
			for(int j = 0; j < 8; ++j) {
			// but independent in "j"
			// ...
			}
			}
			```

			may emit scalar code for the inner loop leading to no vectorization being used at all.

			### Clang (For Grace) Toolchain

			The Clang/LLVM tends to behave similarly, but can be guided to properly vectorize the inner loop with either flags `-O3 -ffast-math -march=native -fno-unroll-loops -mllvm -force-vector-width=8` or pragmas such as `#pragma clang loop vectorize_width(8)` and `#pragma clang loop unroll(disable)`.

			```cpp
			for(int i = 0; i < 1000000; ++i) {
			// Iterations dependent in "i"
			// ...
			#pragma clang loop unroll(disable) vectorize_width(8)
			for(int j = 0; j < 8; ++j) {
			// but independent in "j"
			// ...
			}
			}
			```

			!!! note
			Our basic experiments show that fixed width vectorization (NEON) tends to perform better in the case of short (register-length) loops than SVE. In cases (like above), where specified `vectorize_width` is larger than availiable vector unit width, Clang will emit multiple NEON instructions (eg. 4 instructions will be emitted to process 8 64-bit operations in 128-bit units of Grace).

			### NVHPC Toolchain

			The NVHPC toolchain handled aforementioned case without any additional tuning. Simple `-O3 -march=native -fast` should be therefore sufficient.

			## Basic Math Libraries

			The basic libraries (BLAS and LAPACK) are included in NVHPC toolchain and can be used simply as `-lblas` and `-llapack` for BLAS and LAPACK respectively (`lp64` and `ilp64` versions are also included).

			!!! note
			The Grace platform doesn't include CUDA-capable GPU, therefore `nvcc` will fail with an error. This means that `nvc`, `nvc++` and `nvfortran` should be used instead.

			### NVIDIA Performance Libraries

			The [NVPL](https://developer.nvidia.com/nvpl) package includes more extensive set of libraries in both sequential and multi-threaded versions:

			- BLACS: `-lnvpl_blacs_{lp64,ilp64}_{mpich,openmpi3,openmpi4,openmpi5}`
			- BLAS: `-lnvpl_blas_{lp64,ilp64}_{seq,gomp}`
			- FFTW: `-lnvpl_fftw`
			- LAPACK: `-lnvpl_lapack_{lp64,ilp64}_{seq,gomp}`
			- ScaLAPACK: `-lnvpl_scalapack_{lp64,ilp64}`
			- RAND: `-lnvpl_rand` or `-lnvpl_rand_mt`
			- SPARSE: `-lnvpl_sparse`

			This package should be compatible with all availiable toolchains and includes CMake module files for easy integration into CMake-based projects. For further documentation see also [NVPL](https://docs.nvidia.com/nvpl).

			### Recommended BLAS Library

			We recommend to use the multi-threaded BLAS library from the NVPL package.

			!!! note
			It is important to pin the processes using OMP_PROC_BIND=spread

			Example:

			```console
			$ ml NVHPC
			$ nvc -O3 -march=native myprog.c -o myprog -lnvpl_blas_lp64_gomp
			$ OMP_PROC_BIND=spread ./myprog
			```

			## Basic Communication Libraries

			The OpenMPI 4 implementation is included with NVHPC toolchain and is exposed as a module (`ml OpenMPI`). The following example

			```cpp
			#include <mpi.h>
			#include <sched.h>
			#include <omp.h>

			int main(int argc, char **argv)
			{
			int rank;
			MPI_Init(&argc, &argv);
			MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			#pragma omp parallel
			{
			printf("Hello on rank %d, thread %d on CPU %d\n", rank, omp_get_thread_num(), sched_getcpu());
			}
			MPI_Finalize();
			}
			```

			can be compiled and run as follows

			```console
			ml OpenMPI
			mpic++ -fast -fopenmp hello.cpp -o hello
			OMP_PROC_BIND=close OMP_NUM_THREADS=4 mpirun -np 4 --map-by slot:pe=36 ./hello
			```

			In this configuration we run 4 ranks bound to one quarter of cores each with 4 OpenMP threads.

			## Simple BLAS Application

			The `hello world` example application (written in `C++` and `Fortran`) uses simple stationary probability vector estimation to illustrate use of GEMM (BLAS 3 routine).

			Stationary probability vector estimation in `C++`:

			```cpp
			#include <iostream>
			#include <vector>
			#include <chrono>
			#include "cblas.h"

			const size_t ITERATIONS = 32;
			const size_t MATRIX_SIZE = 1024;

			int main(int argc, char *argv[])
			{
			const size_t matrixElements = MATRIX_SIZE*MATRIX_SIZE;

			std::vector<float> a(matrixElements, 1.0f / float(MATRIX_SIZE));

			for(size_t i = 0; i < MATRIX_SIZE; ++i)
			a[i] = 0.5f / (float(MATRIX_SIZE) - 1.0f);
			a[0] = 0.5f;

			std::vector<float> w1(matrixElements, 0.0f);
			std::vector<float> w2(matrixElements, 0.0f);

			std::copy(a.begin(), a.end(), w1.begin());

			std::vector<float> t1, t2;
			t1 = &w1;
			t2 = &w2;

			auto c1 = std::chrono::steady_clock::now();

			for(size_t i = 0; i < ITERATIONS; ++i)
			{
			std::fill(t2->begin(), t2->end(), 0.0f);

			cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE,
			1.0f, t1->data(), MATRIX_SIZE,
			a.data(), MATRIX_SIZE,
			1.0f, t2->data(), MATRIX_SIZE);

			std::swap(t1, t2);
			}

			auto c2 = std::chrono::steady_clock::now();

			for(size_t i = 0; i < MATRIX_SIZE; ++i)
			{
			std::cout << (t1)[iMATRIX_SIZE + i] << " ";
			}

			std::cout << std::endl;

			std::cout << "Elapsed Time: " << std::chrono::duration<double>(c2 - c1).count() << std::endl;

			return 0;
			}
			```

			Stationary probability vector estimation in `Fortran`:

			```fortran
			program main
			implicit none

			integer :: matrix_size, iterations
			integer :: i
			real, allocatable, target :: a(:,:), w1(:,:), w2(:,:)
			real, dimension(:,:), contiguous, pointer :: t1, t2, tmp
			real, pointer :: out_data(:), out_diag(:)
			integer :: cr, cm, c1, c2

			iterations = 32
			matrix_size = 1024

			call system_clock(count_rate=cr)
			call system_clock(count_max=cm)

			allocate(a(matrix_size, matrix_size))
			allocate(w1(matrix_size, matrix_size))
			allocate(w2(matrix_size, matrix_size))

			a(:,:) = 1.0 / real(matrix_size)
			a(:,1) = 0.5 / real(matrix_size - 1)
			a(1,1) = 0.5

			w1 = a
			w2(:,:) = 0.0

			t1 => w1
			t2 => w2

			call system_clock(c1)

			do i = 0, iterations
			t2(:,:) = 0.0

			call sgemm('N', 'N', matrix_size, matrix_size, matrix_size, 1.0, t1, matrix_size, a, matrix_size, 1.0, t2, matrix_size)

			tmp => t1
			t1 => t2
			t2 => tmp
			end do

			call system_clock(c2)

			out_data(1:size(t1)) => t1
			out_diag => out_data(1::matrix_size+1)

			print *, out_diag
			print *, "Elapsed Time: ", (c2 - c1) / real(cr)

			deallocate(a)
			deallocate(w1)
			deallocate(w2)
			end program main
			```

			### Using NVHPC Toolchain

			The C++ version of the example can be compiled with NVHPC and ran as follows

			```console
			ml NVHPC
			nvc++ -O3 -march=native -fast -I$NVHPC/Linux_aarch64/$EBVERSIONNVHPC/compilers/include/lp64 -lblas main.cpp -o main
			OMP_NUM_THREADS=144 OMP_PROC_BIND=spread ./main
			```

			The Fortran version is just as simple:

			```console
			ml NVHPC
			nvfortran -O3 -march=native -fast -lblas main.f90 -o main.x
			OMP_NUM_THREADS=144 OMP_PROC_BIND=spread ./main
			```

			!!! note
			It may be advantageous to use NVPL libraries instead NVHPC ones. For example DGEMM BLAS 3 routine from NVPL is almost 30% faster than NVHPC one.

			### Using Clang (For Grace) Toolchain

			Similarly Clang for Grace toolchain with NVPL BLAS can be used to compile C++ version of the example.

			```console
			ml NVHPC
			/opt/nvidia/clang/17.23.11/bin/clang++ -O3 -march=native -ffast-math -I$NVHPC/Linux_aarch64/$EBVERSIONNVHPC/compilers/include/lp64 -lnvpl_blas_lp64_gomp main.cpp -o main
			```

			!!! note
			NVHPC module is used just for the `cblas.h` include in this case. This can be avoided by changing the code to use `nvpl_blas.h` instead.

			## Additional Resources

			- [https://www.nvidia.com/en-us/data-center/grace-cpu-superchip/][1]
			- [https://developer.nvidia.com/hpc-sdk][2]
			- [https://developer.nvidia.com/grace/clang][3]
			- [https://docs.nvidia.com/nvpl][4]

			[1]: https://www.nvidia.com/en-us/data-center/grace-cpu-superchip/
			[2]: https://developer.nvidia.com/hpc-sdk
			[3]: https://developer.nvidia.com/grace/clang
			[4]: https://docs.nvidia.com/nvpl

docs.it4i/cs/guides/hm_management.md

0 → 100644

+279 −0

Original line number	Original line	Diff line number	Diff line
			# Heterogeneous Memory Management on Intel Platforms

			Partition `p10-intel` offser heterogeneous memory directly exposed to the user. This allows to manually pick appropriate kind of memory to be used at process or even single allocation granularity. Both kinds of memory are exposed as memory-only NUMA nodes. This allows both coarse (process level) and fine (allocation level) grained control over memory type used.

			## Overview

			At the process level the `numactl` facilities can be utilized, while Intel provided `memkind` library allows for finer control. Both `memkind` library and `numactl` can be accessed by loading `memkind` module or `OpenMPI` module (only `numactl`).

			```bash
			ml memkind
			```

			### Process Level (NUMACTL)

			The `numactl` allows to either restrict memory pool of the process to specific set of memory NUMA nodes

			```bash
			numactl --membind <node_ids_set>
			```

			or select single preffered node

			```bash
			numactl --preffered <node_id>
			```

			where `<node_ids_set>` is comma separated list (eg. `0,2,5,...`) in combination with ranges (such as `0-5`). The `membind` option kills the process if it requests more memory than can be satisfied from specified nodes. The `preffered` option just reverts to using other nodes according to their NUMA distance in the same situation.

			Convenient way to check `numactl` configuration is

			```bash
			numactl -s
			```

			which prints configuration in its execution environment eg.

			```bash
			numactl --membind 8-15 numactl -s
			policy: bind
			preferred node: 0
			physcpubind: 0 1 2 ... 189 190 191
			cpubind: 0 1 2 3 4 5 6 7
			nodebind: 0 1 2 3 4 5 6 7
			membind: 8 9 10 11 12 13 14 15
			```

			The last row shows allocations memory are restricted to NUMA nodes `8-15`.

			### Allocation Level (MEMKIND)

			The `memkind` library (in its simplest use case) offers new variant of `malloc/free` function pair, which allows to specify kind of memory to be used for given allocation. Moving specific allocation from default to HBM memory pool then can be achieved by replacing:

			```cpp
			void *pData = malloc(<SIZE>);
			/* ... */
			free(pData);
			```

			with

			```cpp
			#include <memkind.h>

			void *pData = memkind_malloc(MEMKIND_HBW, <SIZE>);
			/* ... */
			memkind_free(NULL, pData); // "kind" parameter is deduced from the address
			```

			Similarly other memory types can be chosen.

			!!! note
			The allocation will return `NULL` pointer when memory of specified kind is not available.

			## High Bandwidth Memory (HBM)

			Intel Sapphire Rapids (partition `p10-intel`) consists of two sockets each with `128GB` of DDR and `64GB` on-package HBM memory. The machine is configured in FLAT mode and therefore exposes HBM memory as memory-only NUMA nodes (`16GB` per 12-core tile). The configuration can be verified by running

			```bash
			numactl -H
			```

			which should show 16 NUMA nodes (`0-7` should contain 12 cores and `32GB` of DDR DRAM, while `8-15` should have no cores and `16GB` of HBM each).

			![](../../img/cs/guides/p10_numa_sc4_flat.png)

			### Process Level

			With this we can easily restrict application to DDR DRAM or HBM memory:

			```bash
			# Only DDR DRAM
			numactl --membind 0-7 ./stream
			# ...
			Function Best Rate MB/s Avg time Min time Max time
			Copy: 369745.8 0.043355 0.043273 0.043588
			Scale: 366989.8 0.043869 0.043598 0.045355
			Add: 378054.0 0.063652 0.063483 0.063899
			Triad: 377852.5 0.063621 0.063517 0.063884

			# Only HBM
			numactl --membind 8-15 ./stream
			# ...
			Function Best Rate MB/s Avg time Min time Max time
			Copy: 1128430.1 0.015214 0.014179 0.015615
			Scale: 1045065.2 0.015814 0.015310 0.016309
			Add: 1096992.2 0.022619 0.021878 0.024182
			Triad: 1065152.4 0.023449 0.022532 0.024559
			```

			The DDR DRAM achieves bandwidth of around 400GB/s, while the HBM clears 1TB/s bar.

			Some further improvements can be achieved by entirely isolating a process to a single tile. This can be useful for MPI jobs, where `$OMPI_COMM_WORLD_RANK` can be used to bind each process individually. The simple wrapper script to do this may look like

			```bash
			#!/bin/bash
			numactl --membind $((8 + $OMPI_COMM_WORLD_RANK)) $@
			```

			and can be used as

			```bash
			mpirun -np 8 --map-by slot:pe=12 membind_wrapper.sh ./stream_mpi
			```

			(8 tiles with 12 cores each). However, this approach assumes `16GB` of HBM memory local to the tile is sufficient for each process (memory cannot spill between tiles). This approach may be significantly more useful in combination with `--preferred` instead of `--membind` to force preference of local HBM with spill to DDR DRAM. Otherwise

			```bash
			mpirun -n 8 --map-by slot:pe=12 numactl --membind 8-15 ./stream_mpi
			```

			is most likely preferable even for MPI workloads. Applying above approach to MPI Stream with 8 ranks and 1-24 threads per rank we can expect these results:
			![](../../img/cs/guides/p10_stream_dram.png)
			![](../../img/cs/guides/p10_stream_hbm.png)

			### Allocation Level

			Allocation level memory kind selection using `memkind` library can be illustrated using modified stream benchmark. The stream benchmark uses three working arrays (A, B and C), whose allocation can be changed to `memkind_malloc` as follows

			```cpp
			#include <memkind.h>
			// ...
			STREAM_TYPE a = (STREAM_TYPE )memkind_malloc(MEMKIND_HBW_ALL, STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
			STREAM_TYPE b = (STREAM_TYPE )memkind_malloc(MEMKIND_REGULAR, STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
			STREAM_TYPE c = (STREAM_TYPE )memkind_malloc(MEMKIND_HBW_ALL, STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
			// ...
			memkind_free(NULL, a);
			memkind_free(NULL, b);
			memkind_free(NULL, c);
			```

			Arrays A and C are allocated from HBM (`MEMKIND_HBW_ALL`), while DDR DRAM (`MEMKIND_REGULAR`) is used for B.
			The code then has to be linked with `memkind` library

			```bash
			gcc -march=native -O3 -fopenmp -lmemkind memkind_stream.c -o memkind_stream
			```

			and can be run as

			```bash
			export MEMKIND_HBW_NODES=8,9,10,11,12,13,14,15
			OMP_NUM_THREADS=$((N*12)) OMP_PROC_BIND=spread ./memkind_stream
			```

			While the `memkind` library should be able to detect HBM memory on its own (through `HMAT` and `hwloc`) this is not supported on `p10-intel`. This means that NUMA nodes representing HBM have to be specified manually using `MEMKIND_HBW_NODES` environment variable.

			![](../../img/cs/guides/p10_stream_memkind.png)

			With this setup we can see that simple copy operation (C[i] = A[i]) achieves bandwidth comparable to the application bound entirely to HBM memory. On the other hand the scale operation (B[i] = s*C[i]) is mostly limited by DDR DRAM bandwidth. Its also worth noting that operations combining all three arrays are performing close to HBM-only configuration.

			## Simple Application

			One of applications that can greatly benefit from availability of large slower and faster smaller memory is computing histogram with many bins over large dataset.

			```cpp
			#include <iostream>
			#include <vector>
			#include <chrono>
			#include <cmath>
			#include <cstring>
			#include <omp.h>
			#include <memkind.h>

			const size_t N_DATA_SIZE = 2 * 1024 * 1024 * 1024ull;
			const size_t N_BINS_COUNT = 1 * 1024 * 1024ull;
			const size_t N_ITERS = 10;

			#if defined(HBM)
			#define DATA_MEMKIND MEMKIND_REGULAR
			#define BINS_MEMKIND MEMKIND_HBW_ALL
			#else
			#define DATA_MEMKIND MEMKIND_REGULAR
			#define BINS_MEMKIND MEMKIND_REGULAR
			#endif

			int main(int argc, char *argv[])
			{
			const double binWidth = 1.0 / double(N_BINS_COUNT + 1);

			double pData = (double )memkind_malloc(DATA_MEMKIND, N_DATA_SIZE * sizeof(double));
			size_t pBins = (size_t )memkind_malloc(BINS_MEMKIND, N_BINS_COUNT * omp_get_max_threads() * sizeof(double));

			#pragma omp parallel
			{
			drand48_data state;
			srand48_r(omp_get_thread_num(), &state);

			#pragma omp for
			for(size_t i = 0; i < N_DATA_SIZE; ++i)
			drand48_r(&state, &pData[i]);
			}

			auto c1 = std::chrono::steady_clock::now();

			for(size_t it = 0; it < N_ITERS; ++it)
			{
			#pragma omp parallel
			{
			for(size_t i = 0; i < N_BINS_COUNT; ++i)
			pBins[omp_get_thread_num()*N_BINS_COUNT + i] = size_t(0);

			#pragma omp for
			for(size_t i = 0; i < N_DATA_SIZE; ++i)
			{
			const size_t idx = size_t(pData[i] / binWidth) % N_BINS_COUNT;
			pBins[omp_get_thread_num()*N_BINS_COUNT + idx]++;
			}
			}
			}

			auto c2 = std::chrono::steady_clock::now();

			#pragma omp parallel for
			for(size_t i = 0; i < N_BINS_COUNT; ++i)
			{
			for(size_t j = 1; j < omp_get_max_threads(); ++j)
			pBins[i] += pBins[j*N_BINS_COUNT + i];
			}

			std::cout << "Elapsed Time [s]: " << std::chrono::duration<double>(c2 - c1).count() << std::endl;

			size_t total = 0;
			#pragma omp parallel for reduction(+:total)
			for(size_t i = 0; i < N_BINS_COUNT; ++i)
			total += pBins[i];

			std::cout << "Total Items: " << total << std::endl;

			memkind_free(NULL, pData);
			memkind_free(NULL, pBins);

			return 0;
			}
			```

			### Using HBM Memory (P10-Intel)

			Following commands can be used to compile and run example application above

			```bash
			ml GCC memkind
			export MEMKIND_HBW_NODES=8,9,10,11,12,13,14,15
			g++ -O3 -fopenmp -lmemkind histogram.cpp -o histogram_dram
			g++ -O3 -fopenmp -lmemkind -DHBM histogram.cpp -o histogram_hbm
			OMP_PROC_BIND=spread GOMP_CPU_AFFINITY=0-95 OMP_NUM_THREADS=96 ./histogram_dram
			OMP_PROC_BIND=spread GOMP_CPU_AFFINITY=0-95 OMP_NUM_THREADS=96 ./histogram_hbm
			```

			Moving histogram bins data into HBM memory should speedup the algorithm more than twice. It should be noted that moving also `pData` array into HBM memory worsens this result (presumably because the algorithm can saturate both memory interfaces).

			## Additional Resources

			- [https://linux.die.net/man/8/numactl][1]
			- [http://memkind.github.io/memkind/man_pages/memkind.html][2]
			- [https://lenovopress.lenovo.com/lp1738-implementing-intel-high-bandwidth-memory][3]

			[1]: https://linux.die.net/man/8/numactl
			[2]: http://memkind.github.io/memkind/man_pages/memkind.html
			[3]: https://lenovopress.lenovo.com/lp1738-implementing-intel-high-bandwidth-memory
			No newline at end of file

docs.it4i/cs/guides/horizon.md

0 → 100644

+79 −0

Original line number	Original line	Diff line number	Diff line
			# Using VMware Horizon

			VMware Horizon is a virtual desktop infrastructure (VDI) solution
			that enables users to access virtual desktops and applications from any device and any location.
			It provides a comprehensive end-to-end solution for managing and delivering virtual desktops and applications,
			including features such as session management, user authentication, and virtual desktop provisioning.

			![](../../img/horizon.png)

			## How to Access VMware Horizon

			!!! important
			Access to VMware Horizon requires IT4I VPN.

			1. Contact [IT4I support][a] with a request for an access and VM allocation.
			1. [Download][1] and install the VMware Horizon Client for Windows.
			1. Add a new server `https://vdi-cs01.msad.it4i.cz/` in the Horizon client.
			1. Connect to the server using your IT4I username and password.
			Username is in the `domain\username` format and the domain is `msad.it4i.cz`.
			For example: `msad.it4i.cz\user123`

			## Example

			Below is an example of how to mount a remote folder and check the conection on Windows OS:

			### Prerequsities

			3D applications

			* [Blender][3]

			SSHFS for remote access

			* [sshfs-win][4]
			* [winfsp][5]
			* [shfs-win-manager][6]
			* ssh keys for access to clusters

			### Steps

			1. Start the VPN and connect to the server via VMware Horizon Client.

			![](../../img/vmware.png)

			1. Mount a remote folder.
			* Run sshfs-win-manager.

			![](../../img/sshfs.png)

			* Add a new connection.

			![](../../img/sshfs1.png)

			* Click on Connect.

			![](../../img/sshfs2.png)

			1. Check that the folder is mounted.

			![](../../img/mount.png)

			1. Check the GPU resources.

			![](../../img/gpu.png)

			### Blender

			Now if you run, for example, Blender, you can check the available GPU resources in Blender Preferences.

			![](../../img/blender.png)

			[a]: mailto:support@it4i.cz

			[1]: https://vdi-cs01.msad.it4i.cz/
			[2]: https://www.paraview.org/download/
			[3]: https://www.blender.org/download/
			[4]: https://github.com/winfsp/sshfs-win/releases
			[5]: https://github.com/winfsp/winfsp/releases/
			[6]: https://github.com/evsar3/sshfs-win-manager/releases

docs.it4i/cs/guides/power10.md

0 → 100644

+227 −0

Original line number	Original line	Diff line number	Diff line
			# Using IBM Power Partition

			For testing your application on the IBM Power partition,
			you need to prepare a job script for that partition or use the interactive job:

			```console
			scalloc -N 1 -c 192 -A PROJECT-ID -p p07-power --time=08:00:00
			```

			where:

			- `-N 1` means allocation single node,
			- `-c 192` means allocation 192 cores (threads),
			- `-p p07-power` is IBM Power partition,
			- `--time=08:00:00` means allocation for 8 hours.

			On the partition, you should reload the list of modules:

			```
			ml architecture/ppc64le
			```

			The platform offers both `GNU` based and proprietary IBM toolchains for building applications. IBM also provides optimized BLAS routines library ([ESSL](https://www.ibm.com/docs/en/essl/6.1)), which can be used by both toolchain.

			## Building Applications

			Our sample application depends on `BLAS`, therefore we start by loading following modules (regardless of which toolchain we want to use):

			```
			ml GCC OpenBLAS
			```

			### GCC Toolchain

			In the case of GCC toolchain we can go ahead and compile the application as usual using either `g++`

			```
			g++ -lopenblas hello.cpp -o hello
			```

			or `gfortran`

			```
			gfortran -lopenblas hello.f90 -o hello
			```

			as usual.

			### IBM Toolchain

			The IBM toolchain requires additional environment setup as it is installed in `/opt/ibm` and is not exposed as a module

			```
			IBM_ROOT=/opt/ibm
			OPENXLC_ROOT=$IBM_ROOT/openxlC/17.1.1
			OPENXLF_ROOT=$IBM_ROOT/openxlf/17.1.1

			export PATH=$OPENXLC_ROOT/bin:$PATH
			export LD_LIBRARY_PATH=$OPENXLC_ROOT/lib:$LD_LIBRARY_PATH

			export PATH=$OPENXLF_ROOT/bin:$PATH
			export LD_LIBRARY_PATH=$OPENXLF_ROOT/lib:$LD_LIBRARY_PATH
			```

			from there we can use either `ibm-clang++`

			```
			ibm-clang++ -lopenblas hello.cpp -o hello
			```

			or `xlf`

			```
			xlf -lopenblas hello.f90 -o hello
			```

			to build the application as usual.

			!!! note
			Combination of `xlf` and `openblas` seems to cause severe performance degradation. Therefore `ESSL` library should be preferred (see below).

			### Using ESSL Library

			The [ESSL](https://www.ibm.com/docs/en/essl/6.1) library is installed in `/opt/ibm/math/essl/7.1` so we define additional environment variables

			```
			IBM_ROOT=/opt/ibm
			ESSL_ROOT=${IBM_ROOT}math/essl/7.1
			export LD_LIBRARY_PATH=$ESSL_ROOT/lib64:$LD_LIBRARY_PATH
			```

			The simplest way to utilize `ESSL` in application, which already uses `BLAS` or `CBLAS` routines is to link with the provided `libessl.so`. This can be done by replacing `-lopenblas` with `-lessl` or `-lessl -lopenblas` (in case `ESSL` does not provide all required `BLAS` routines).
			In practice this can look like

			```
			g++ -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.cpp -o hello
			```

			or

			```
			gfortran -L${ESSL_ROOT}/lib64 -lessl -lopenblas hello.f90 -o hello
			```

			and similarly for IBM compilers (`ibm-clang++` and `xlf`).

			## Hello World Applications

			The `hello world` example application (written in `C++` and `Fortran`) uses simple stationary probability vector estimation to illustrate use of GEMM (BLAS 3 routine).

			Stationary probability vector estimation in `C++`:

			```c++
			#include <iostream>
			#include <vector>
			#include <chrono>
			#include "cblas.h"

			const size_t ITERATIONS = 32;
			const size_t MATRIX_SIZE = 1024;

			int main(int argc, char *argv[])
			{
			const size_t matrixElements = MATRIX_SIZE*MATRIX_SIZE;

			std::vector<float> a(matrixElements, 1.0f / float(MATRIX_SIZE));

			for(size_t i = 0; i < MATRIX_SIZE; ++i)
			a[i] = 0.5f / (float(MATRIX_SIZE) - 1.0f);
			a[0] = 0.5f;

			std::vector<float> w1(matrixElements, 0.0f);
			std::vector<float> w2(matrixElements, 0.0f);

			std::copy(a.begin(), a.end(), w1.begin());

			std::vector<float> t1, t2;
			t1 = &w1;
			t2 = &w2;

			auto c1 = std::chrono::steady_clock::now();

			for(size_t i = 0; i < ITERATIONS; ++i)
			{
			std::fill(t2->begin(), t2->end(), 0.0f);

			cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE,
			1.0f, t1->data(), MATRIX_SIZE,
			a.data(), MATRIX_SIZE,
			1.0f, t2->data(), MATRIX_SIZE);

			std::swap(t1, t2);
			}

			auto c2 = std::chrono::steady_clock::now();

			for(size_t i = 0; i < MATRIX_SIZE; ++i)
			{
			std::cout << (t1)[iMATRIX_SIZE + i] << " ";
			}

			std::cout << std::endl;

			std::cout << "Elapsed Time: " << std::chrono::duration<double>(c2 - c1).count() << std::endl;

			return 0;
			}
			```

			Stationary probability vector estimation in `Fortran`:

			```fortran
			program main
			implicit none

			integer :: matrix_size, iterations
			integer :: i
			real, allocatable, target :: a(:,:), w1(:,:), w2(:,:)
			real, dimension(:,:), contiguous, pointer :: t1, t2, tmp
			real, pointer :: out_data(:), out_diag(:)
			integer :: cr, cm, c1, c2

			iterations = 32
			matrix_size = 1024

			call system_clock(count_rate=cr)
			call system_clock(count_max=cm)

			allocate(a(matrix_size, matrix_size))
			allocate(w1(matrix_size, matrix_size))
			allocate(w2(matrix_size, matrix_size))

			a(:,:) = 1.0 / real(matrix_size)
			a(:,1) = 0.5 / real(matrix_size - 1)
			a(1,1) = 0.5

			w1 = a
			w2(:,:) = 0.0

			t1 => w1
			t2 => w2

			call system_clock(c1)

			do i = 0, iterations
			t2(:,:) = 0.0

			call sgemm('N', 'N', matrix_size, matrix_size, matrix_size, 1.0, t1, matrix_size, a, matrix_size, 1.0, t2, matrix_size)

			tmp => t1
			t1 => t2
			t2 => tmp
			end do

			call system_clock(c2)

			out_data(1:size(t1)) => t1
			out_diag => out_data(1::matrix_size+1)

			print *, out_diag
			print *, "Elapsed Time: ", (c2 - c1) / real(cr)

			deallocate(a)
			deallocate(w1)
			deallocate(w2)
			end program main
			```

docs.it4i/cs/guides/xilinx.md

0 → 100644

+876 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/cs/introduction.md

+3 −1

Original line number	Original line	Diff line number	Diff line
	# Complementary Systems		# Introduction

	Complementary systems offer development environment for users		Complementary systems offer development environment for users
	that need to port and optimize their code and applications		that need to port and optimize their code and applications
	@@ -26,6 +26,8 @@ Second stage of complementary systems implementation comprises of these partitio
	- compute partition 7 - based on IBM Power10 architecture		- compute partition 7 - based on IBM Power10 architecture
	- compute partition 8 - modern CPU with a very high L3 cache capacity (over 750MB)		- compute partition 8 - modern CPU with a very high L3 cache capacity (over 750MB)
	- compute partition 9 - virtual GPU accelerated workstations		- compute partition 9 - virtual GPU accelerated workstations
			- compute partition 10 - Sapphire Rapids-HBM server
			- compute partition 11 - NVIDIA Grace CPU Superchip

	![](../img/cs2_2.png)		![](../img/cs2_2.png)

docs.it4i/cs/job-scheduling.md

+17 −2

Original line number	Original line	Diff line number	Diff line
	@@ -20,6 +20,7 @@ p05-synt up 1-00:00:00 0/1/0/1 p05-synt01
	p06-arm up 1-00:00:00 0/2/0/2 p06-arm[01-02]		p06-arm up 1-00:00:00 0/2/0/2 p06-arm[01-02]
	p07-power up 1-00:00:00 0/1/0/1 p07-power01		p07-power up 1-00:00:00 0/1/0/1 p07-power01
	p08-amd up 1-00:00:00 0/1/0/1 p08-amd01		p08-amd up 1-00:00:00 0/1/0/1 p08-amd01
			p10-intel up 1-00:00:00 0/1/0/1 p10-intel01
	```		```

	## Getting Job Information		## Getting Job Information
	@@ -89,7 +90,7 @@ set \| grep ^SLURM

	\| variable name \| description \| example \|		\| variable name \| description \| example \|
	\| ------ \| ------ \| ------ \|		\| ------ \| ------ \| ------ \|
	\| SLURM_JOBID \| job id of the executing job\| 593 \|		\| SLURM_JOB_ID \| job id of the executing job\| 593 \|
	\| SLURM_JOB_NODELIST \| nodes allocated to the job \| p03-amd[01-02] \|		\| SLURM_JOB_NODELIST \| nodes allocated to the job \| p03-amd[01-02] \|
	\| SLURM_JOB_NUM_NODES \| number of nodes allocated to the job \| 2 \|		\| SLURM_JOB_NUM_NODES \| number of nodes allocated to the job \| 2 \|
	\| SLURM_STEP_NODELIST \| nodes allocated to the job step \| p03-amd01 \|		\| SLURM_STEP_NODELIST \| nodes allocated to the job step \| p03-amd01 \|
	@@ -145,6 +146,7 @@ $ scancel JOBID
	\| p06-arm \| 2 \| yes \| 80 \| aarch64,ib \|		\| p06-arm \| 2 \| yes \| 80 \| aarch64,ib \|
	\| p07-power \| 1 \| yes \| 192 \| ppc64le,ib \|		\| p07-power \| 1 \| yes \| 192 \| ppc64le,ib \|
	\| p08-amd \| 1 \| yes \| 128 \| x86_64,amd,milan-x,ib,ht \|		\| p08-amd \| 1 \| yes \| 128 \| x86_64,amd,milan-x,ib,ht \|
			\| p10-intel \| 1 \| yes \| 96 \| x86_64,intel,sapphire_rapids,ht\|

	Use `-t`, `--time` option to specify job run time limit. Default job time limit is 2 hours, maximum job time limit is 24 hours.		Use `-t`, `--time` option to specify job run time limit. Default job time limit is 2 hours, maximum job time limit is 24 hours.

	@@ -312,6 +314,14 @@ Whole node allocation:
	salloc -A PROJECT-ID -p p08-amd		salloc -A PROJECT-ID -p p08-amd
	```		```

			## Partition 10 - Intel Sapphire Rapids

			Whole node allocation:

			```console
			salloc -A PROJECT-ID -p p10-intel
			```

	## Features		## Features

	Nodes have feature tags assigned to them.		Nodes have feature tags assigned to them.
	@@ -326,6 +336,7 @@ Users can select nodes based on the feature tags using --constraint option.
	\| intel \| manufacturer \|		\| intel \| manufacturer \|
	\| icelake \| processor family \|		\| icelake \| processor family \|
	\| broadwell \| processor family \|		\| broadwell \| processor family \|
			\| sapphire_rapids \| processor family \|
	\| milan \| processor family \|		\| milan \| processor family \|
	\| milan-x \| processor family \|		\| milan-x \| processor family \|
	\| ib \| Infiniband \|		\| ib \| Infiniband \|
	@@ -342,10 +353,14 @@ p00-arm01 aarch64,cortex-a72
	p01-arm[01-08] aarch64,a64fx,ib		p01-arm[01-08] aarch64,a64fx,ib
	p02-intel01 x86_64,intel,icelake,ib,fpga,bitware,nvdimm,ht		p02-intel01 x86_64,intel,icelake,ib,fpga,bitware,nvdimm,ht
	p02-intel02 x86_64,intel,icelake,ib,fpga,bitware,nvdimm,noht		p02-intel02 x86_64,intel,icelake,ib,fpga,bitware,nvdimm,noht
	p03-amd01 x86_64,amd,milan,ib,gpu,mi100,fpga,xilinx,ht
	p03-amd02 x86_64,amd,milan,ib,gpu,mi100,fpga,xilinx,noht		p03-amd02 x86_64,amd,milan,ib,gpu,mi100,fpga,xilinx,noht
			p03-amd01 x86_64,amd,milan,ib,gpu,mi100,fpga,xilinx,ht
	p04-edge01 x86_64,intel,broadwell,ib,ht		p04-edge01 x86_64,intel,broadwell,ib,ht
	p05-synt01 x86_64,amd,milan,ib,ht		p05-synt01 x86_64,amd,milan,ib,ht
			p06-arm[01-02] aarch64,ib
			p07-power01 ppc64le,ib
			p08-amd01 x86_64,amd,milan-x,ib,ht
			p10-intel01 x86_64,intel,sapphire_rapids,ht
	```		```

	```		```

docs.it4i/cs/specifications.md

+31 −0

Original line number	Original line	Diff line number	Diff line
	@@ -199,7 +199,38 @@ The following is the list of software available on partiton 09:
	- 40x Windows 10/11 Enterprise E3 VDA (Microsoft) per year		- 40x Windows 10/11 Enterprise E3 VDA (Microsoft) per year
	- Hardware VMware Horizon management		- Hardware VMware Horizon management

			## Partition 10 - Sapphire Rapids-HBM Server

			The primary purpose of this server is to evaluate the impact of the HBM memory on the x86 processor
			on the performance of the user applications.
			This is a new feature previously available only on the GPGPU accelerators
			and provided a significant boost to the memory-bound applications.
			Users can also compare the impact of the HBM memory with the impact of the large L3 cache
			available on the AMD Milan-X processor also available on the complementary systems.
			The server is also equipped with DDR5 memory and enables the comparative studies with reference to DDR4 based systems.

			- 2x Intel® Xeon® CPU Max 9468 48 cores base 2.1GHz, max 3.5Ghz
			- 16x 16GB DDR5 4800Mhz
			- 2x Intel D3 S4520 960GB SATA 6Gb/s
			- 1x Supermicro Standard LP 2-port 10GbE RJ45, Broadcom BCM57416

			## Partition 11 - NVIDIA Grace CPU Superchip

			The [NVIDIA Grace CPU Superchip][6] uses the [NVIDIA® NVLink®-C2C][5] technology to deliver 144 Arm® Neoverse V2 cores and 1TB/s of memory bandwidth.
			Runs all NVIDIA software stacks and platforms, including NVIDIA RTX™, NVIDIA HPC SDK, NVIDIA AI, and NVIDIA Omniverse™.

			- Superchip design with up to 144 Arm Neoverse V2 CPU cores with Scalable Vector Extensions (SVE2)
			- World’s first LPDDR5X with error-correcting code (ECC) memory, 1TB/s total bandwidth
			- 900GB/s coherent interface, 7X faster than PCIe Gen 5
			- NVIDIA Scalable Coherency Fabric with 3.2TB/s of aggregate bisectional bandwidth
			- 2X the packaging density of DIMM-based solutions
			- 2X the performance per watt of today’s leading CPU
			- FP64 Peak of 7.1TFLOPS

	[1]: https://www.bittware.com/fpga/520n-mx/		[1]: https://www.bittware.com/fpga/520n-mx/
	[2]: https://www.xilinx.com/products/boards-and-kits/alveo/u250.html#overview		[2]: https://www.xilinx.com/products/boards-and-kits/alveo/u250.html#overview
	[3]: https://www.xilinx.com/products/boards-and-kits/alveo/u280.html#overview		[3]: https://www.xilinx.com/products/boards-and-kits/alveo/u280.html#overview
	[4]: https://developer.arm.com/documentation/100095/0003/		[4]: https://developer.arm.com/documentation/100095/0003/
			[5]: https://www.nvidia.com/en-us/data-center/nvlink-c2c/
			[6]: https://www.nvidia.com/en-us/data-center/grace-cpu-superchip/

docs.it4i/dgx2/accessing.md

+3 −1

Original line number	Original line	Diff line number	Diff line
	@@ -7,7 +7,8 @@

	## How to Access		## How to Access

	The DGX-2 machine can be accessed through the scheduler from Barbora login nodes `barbora.it4i.cz` as a compute node cn202.		The DGX-2 machine is integrated into [Barbora cluster][3].
			The DGX-2 machine can be accessed from Barbora login nodes `barbora.it4i.cz` through the Barbora scheduler queue qdgx as a compute node cn202.

	## Storage		## Storage

	@@ -32,3 +33,4 @@ For more information on accessing PROJECT, its quotas, etc., see the [PROJECT Da

	[1]: ../../barbora/storage/#home-file-system		[1]: ../../barbora/storage/#home-file-system
	[2]: ../../storage/project-storage		[2]: ../../storage/project-storage
			[3]: ../../barbora/introduction

docs.it4i/dgx2/introduction.md

+2 −2

Original line number	Original line	Diff line number	Diff line
	# NVIDIA DGX-2		# Introduction

	The DGX-2 is a very powerful computational node, featuring high end x86_64 processors and 16 NVIDIA V100-SXM3 GPUs.		NVIDIA DGX-2 is a very powerful computational node, featuring high end x86_64 processors and 16 NVIDIA V100-SXM3 GPUs.

	\| NVIDIA DGX-2 \| \|		\| NVIDIA DGX-2 \| \|
	\| --- \| --- \|		\| --- \| --- \|

docs.it4i/dgx2/job_execution.md

+17 −30

Original line number	Original line	Diff line number	Diff line
	@@ -2,38 +2,24 @@

	To run a job, computational resources of DGX-2 must be allocated.		To run a job, computational resources of DGX-2 must be allocated.

	## Resources Allocation Policy		The DGX-2 machine is integrated to and accessible through Barbora cluster, the queue for the DGX-2 machine is called qdgx.

	The resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue. The queue provides prioritized and exclusive access to computational resources.

	The queue for the DGX-2 machine is called qdgx.

	!!! note
	The qdgx queue is configured to run one job and accept one job in a queue per user with the maximum walltime of a job being 48 hours.

	## Job Submission and Execution

	The `qsub` submits the job into the queue. The command creates a request to the PBS Job manager for allocation of specified resources. The resources will be allocated when available, subject to allocation policies and constraints. After the resources are allocated, the jobscript or interactive shell is executed on the allocated node.

	### Job Submission

	When allocating computational resources for the job, specify:		When allocating computational resources for the job, specify:

	1. a queue for your job (the default is qdgx);		1. your Project ID
	1. the maximum wall time allocated to your calculation (default is 4 hour, maximum is 48 hour);		1. a queue for your job - qdgx;
	1. a jobscript or interactive switch.		1. the maximum time allocated to your calculation (default is 4 hour, maximum is 48 hour);
			1. a jobscript if batch processing is intended.
	!!! info
	You can access the DGX PBS scheduler by loading the "DGX-2" module.

	Submit the job using the `qsub` command:		Submit the job using the `sbatch` (for batch processing) or `salloc` (for interactive session) command:

	Example		Example

	```console		```console
	[kru0052@login2.barbora ~]$ qsub -q qdgx -l walltime=02:00:00 -I		[kru0052@login2.barbora ~]$ salloc -A PROJECT-ID -p qdgx --time=02:00:00
	qsub: waiting for job 258.dgx to start		salloc: Granted job allocation 36631
	qsub: job 258.dgx ready		salloc: Waiting for resource configuration
			salloc: Nodes cn202 are ready for job

	kru0052@cn202:~$ nvidia-smi		kru0052@cn202:~$ nvidia-smi
	Wed Jun 16 07:46:32 2021		Wed Jun 16 07:46:32 2021
	@@ -95,9 +81,9 @@ kru0052@cn202:~$ exit
	```		```

	!!! tip		!!! tip
	Submit the interactive job using the `qsub -I ...` command.		Submit the interactive job using the `salloc` command.

	### Job Execution		## Job Execution

	The DGX-2 machine runs only a bare-bone, minimal operating system. Users are expected to run		The DGX-2 machine runs only a bare-bone, minimal operating system. Users are expected to run
	[Apptainer/Singularity][1] containers in order to enrich the environment according to the needs.		[Apptainer/Singularity][1] containers in order to enrich the environment according to the needs.
	@@ -107,12 +93,13 @@ Containers (Docker images) optimized for DGX-2 may be downloaded from
	copy the docker nvcr.io link from the Pull Command section. This link may be directly used		copy the docker nvcr.io link from the Pull Command section. This link may be directly used
	to download the container via Apptainer/Singularity, see the example below:		to download the container via Apptainer/Singularity, see the example below:

	#### Example - Apptainer/Singularity Run Tensorflow		### Example - Apptainer/Singularity Run Tensorflow

	```console		```console
	[kru0052@login2.barbora ~]$ qsub -q qdgx -l walltime=01:00:00 -I		[kru0052@login2.barbora ~] $ salloc -A PROJECT-ID -p qdgx --time=02:00:00
	qsub: waiting for job 96.dgx to start		salloc: Granted job allocation 36633
	qsub: job 96.dgx ready		salloc: Waiting for resource configuration
			salloc: Nodes cn202 are ready for job

	kru0052@cn202:~$ singularity shell docker://nvcr.io/nvidia/tensorflow:19.02-py3		kru0052@cn202:~$ singularity shell docker://nvcr.io/nvidia/tensorflow:19.02-py3
	Singularity tensorflow_19.02-py3.sif:~>		Singularity tensorflow_19.02-py3.sif:~>

docs.it4i/dice.md

+3 −1

Original line number	Original line	Diff line number	Diff line
	@@ -2,7 +2,9 @@

	DICE (Data Infrastructure Capacity for EOSC) is an international project funded by the European Union		DICE (Data Infrastructure Capacity for EOSC) is an international project funded by the European Union
	that provides cutting-edge data management services and a significant amount of storage resources for the EOSC.		that provides cutting-edge data management services and a significant amount of storage resources for the EOSC.
	The EOSC (European Open Science Cloud) project provides European researchers, innovators, companies, and citizens with a federated and open multi-disciplinary environment where they can publish, find, and re-use data, tools, and services for research, innovation and educational purposes.		The EOSC (European Open Science Cloud) project provides European researchers, innovators, companies,
			and citizens with a federated and open multi-disciplinary environment
			where they can publish, find, and re-use data, tools, and services for research, innovation and educational purposes.

	For more information, see the official [DICE project][b] and [EOSC project][q] pages.		For more information, see the official [DICE project][b] and [EOSC project][q] pages.

docs.it4i/environment-and-modules.md

+6 −4

Original line number	Original line	Diff line number	Diff line
	@@ -2,7 +2,9 @@

	## Shells on Clusters		## Shells on Clusters

	The table shows which shells are supported on the IT4Innovations clusters.		The table shows which shells are available on the IT4Innovations clusters.

			Note that bash is the only supported shell.

	\| Cluster Name \| bash \| tcsh \| zsh \| ksh \| dash \|		\| Cluster Name \| bash \| tcsh \| zsh \| ksh \| dash \|
	\| --------------- \| ---- \| ---- \| --- \| --- \| ---- \|		\| --------------- \| ---- \| ---- \| --- \| --- \| ---- \|
	@@ -11,7 +13,7 @@ The table shows which shells are supported on the IT4Innovations clusters.
	\| DGX-2 \| yes \| no \| no \| no \| no \|		\| DGX-2 \| yes \| no \| no \| no \| no \|

	!!! info		!!! info
	BASH is the default shell. Should you need a different shell, contact [support\[at\]it4i.cz][3].		Bash is the default shell. Should you need a different shell, contact [support\[at\]it4i.cz][3].

	## Environment Customization		## Environment Customization

	@@ -24,7 +26,7 @@ After logging in, you may want to configure the environment. Write your preferre
	export MODULEPATH=${MODULEPATH}:/home/$USER/.local/easybuild/modules/all		export MODULEPATH=${MODULEPATH}:/home/$USER/.local/easybuild/modules/all

	# User specific aliases and functions		# User specific aliases and functions
	alias qs='qstat -a'		alias sq='squeue --me'

	# load default intel compilator !!! is not recommended !!!		# load default intel compilator !!! is not recommended !!!
	ml intel		ml intel
	@@ -37,7 +39,7 @@ fi
	```		```

	!!! note		!!! note
	Do not run commands outputting to standard output (echo, module list, etc.) in .bashrc for non-interactive SSH sessions. It breaks the fundamental functionality (SCP, PBS) of your account. Take care for SSH session interactivity for such commands as stated in the previous example.		Do not run commands outputting to standard output (echo, module list, etc.) in .bashrc for non-interactive SSH sessions. It breaks the fundamental functionality (SCP) of your account. Take care for SSH session interactivity for such commands as stated in the previous example.

	### Application Modules		### Application Modules

docs.it4i/general/AUP-final.pdf

0 → 100644

+354 KiB

File added.

No diff preview for this file type.

View file

docs.it4i/general/access/einfracz-account.md

+21 −8

Original line number	Original line	Diff line number	Diff line
	@@ -9,15 +9,25 @@ IT4Innovations has become a member of e-INFRA CZ on January 2022.

	## Request e-INFRA CZ Account		## Request e-INFRA CZ Account

			1. Request an account:
	1. Go to [https://signup.e-infra.cz/fed/registrar/?vo=IT4Innovations][2]		1. Go to [https://signup.e-infra.cz/fed/registrar/?vo=IT4Innovations][2]
	1. Select a member academic institution you are affiliated with.		1. Select a member academic institution you are affiliated with.
	1. Fill out the e-INFRA CZ Account information (username, password and ssh key(s)).		1. Fill out the e-INFRA CZ Account information (username, password and ssh key(s)).

	Your account should be created in a few minutes after submitting the request.		Your account should be created in a few minutes after submitting the request.

	Once your e-INFRA CZ account is created, it is propagated into IT4I systems		Once your e-INFRA CZ account is created, it is propagated into IT4I systems
	and can be used to access [SCS portal][3] and [Request Tracker][4].		and can be used to access [SCS portal][3] and [Request Tracker][4].

			1. Provide additional information via [IT4I support][a] or email [support\[at\]it4i.cz][b] (required, note that without this information, you cannot use IT4I resources):
			1. Full name
			1. Gender
			1. Citizenship
			1. Country of residence
			1. Organization/affiliation
			1. Organization/affiliation country
			1. Organization/affiliation type (university, company, R&D institution, private/public sector (hospital, police), academy of sciences, etc.)
			1. Job title (student, PhD student, researcher, research assistant, employee, etc.)

	Continue to apply for a project or project membership to access clusters through the [SCS portal][3].		Continue to apply for a project or project membership to access clusters through the [SCS portal][3].

	## Logging Into IT4I Services		## Logging Into IT4I Services
	@@ -38,3 +48,6 @@ You can change you profile settings at any time.
	[4]: https://support.it4i.cz/		[4]: https://support.it4i.cz/
	[5]: ../../management/einfracz-profile.md		[5]: ../../management/einfracz-profile.md
	[6]: https://www.eduid.cz/		[6]: https://www.eduid.cz/

			[a]: https://support.it4i.cz/rt/
			[b]: mailto:support@it4i.cz

docs.it4i/general/accessing-the-clusters/graphical-user-interface/ood.md

+35 −5

Original line number	Original line	Diff line number	Diff line
	@@ -8,13 +8,43 @@ For more information, see the Open OnDemand [documentation][2].

	## Access Open OnDemand		## Access Open OnDemand

	!!! note
	Mate is currently available on Karolina only.

	To access the OOD service, you must be connected to [IT4I VPN][a].		To access the OOD service, you must be connected to [IT4I VPN][a].
	Then go to [https://ood-karolina.it4i.cz/][3] for Karolina or [https://ood-barbora.it4i.cz/][4] for Barbora and enter your e-INFRA CZ or IT4I credentials.		Then go to [https://ood-karolina.it4i.cz/][3] for Karolina
			or [https://ood-barbora.it4i.cz/][4] for Barbora and enter your e-INFRA CZ or IT4I credentials.

			From the top menu bar, you can manage your files and jobs, access the cluster's shell
			and launch interactive apps on login nodes.

	From the top menu bar, you can manage your files and jobs, access the cluster's shell and launch interactive apps on login nodes - Mate & XFCE desktops.		## OOD Apps on IT4I Clusters

			!!! note
			Barbora OOD offers Mate and XFCE Desktops on login node only. Other applications listed below are exclusive to Karolina OOD.

			* Desktops
			* Karolina Login Mate
			* Karolina Login XFCE
			* Gnome Desktop
			* GUIs
			* Ansys
			* Blender
			* ParaView
			* TorchStudio
			* Servers
			* Code Server
			* Jupyter (+IJulia)
			* MATLAB
			* TensorBoard
			* Simulation
			* Code Aster

			Depending on a selected application, you can set up various properties;
			e.g. partition, number of nodes, tasks per node reservation, etc.

			For `qgpu` partitions, you can select the number of GPUs.

			![Ansys app in OOD GUI](../../../img/ood-ansys.png)

			## Job Composer Tutorial

	Under Jobs > Job Composer, you can create jobs from several sources.		Under Jobs > Job Composer, you can create jobs from several sources.
	A simple tutorial will guide you through the process.		A simple tutorial will guide you through the process.

docs.it4i/general/accessing-the-clusters/graphical-user-interface/vnc.md

+5 −5

Original line number	Original line	Diff line number	Diff line
	@@ -227,10 +227,10 @@ Open a Terminal (_Applications -> System Tools -> Terminal_). Run all the follow

	Allow incoming X11 graphics from the compute nodes at the login node:		Allow incoming X11 graphics from the compute nodes at the login node:

	Get an interactive session on a compute node (for more detailed info [look here][4]). Forward X11 system using `X` option:		Get an interactive session on a compute node (for more detailed info [look here][4]). Forward X11 system using `--x11` option:

	```console		```console
	$ qsub -I -X -A PROJECT_ID -q qprod -l select=1:ncpus=36		$ salloc -A PROJECT_ID -p qcpu --x11
	```		```

	Test that the DISPLAY redirection into your VNC session works, by running an X11 application (e.g. XTerm, Intel Advisor, etc.) on the assigned compute node:		Test that the DISPLAY redirection into your VNC session works, by running an X11 application (e.g. XTerm, Intel Advisor, etc.) on the assigned compute node:
	@@ -249,10 +249,10 @@ For a [better performance][1] an SSH connection can be used.

	Open two Terminals (_Applications -> System Tools -> Terminal_) as described before.		Open two Terminals (_Applications -> System Tools -> Terminal_) as described before.

	Get an interactive session on a compute node (for more detailed info [look here][4]). Forward X11 system using `X` option:		Get an interactive session on a compute node (for more detailed info [look here][4]). Forward X11 system using `--x11` option:

	```console		```console
	$ qsub -I -X -A PROJECT_ID -q qprod -l select=1:ncpus=36		$ salloc -A PROJECT_ID -p qcpu --x11
	```		```

	In the second terminal connect to the assigned node and run the X11 application		In the second terminal connect to the assigned node and run the X11 application

docs.it4i/general/accessing-the-clusters/graphical-user-interface/x-window-system.md

+6 −6

Original line number	Original line	Diff line number	Diff line
	@@ -99,21 +99,21 @@ In this example, we activate the Intel programing environment tools and then sta

	## GUI Applications on Compute Nodes		## GUI Applications on Compute Nodes

	Allocate the compute nodes using the `-X` option on the `qsub` command:		Allocate the compute nodes using the `--x11` option on the `salloc` command:

	```console		```console
	$ qsub -q qexp -l select=2:ncpus=24 -X -I		$ salloc -A PROJECT-ID -q qcpu_exp --x11
	```		```

	In this example, we allocate 2 nodes via qexp queue, interactively. We request X11 forwarding with the `-X` option. It will be possible to run the GUI enabled applications directly on the first compute node.		In this example, we allocate one node via qcpu_exp queue, interactively. We request X11 forwarding with the `--x11` option. It will be possible to run the GUI enabled applications directly on the first compute node.

	For better performance, log on the allocated compute node via SSH, using the `-X` option.		For better performance, log on the allocated compute node via SSH, using the `-X` option.

	```console		```console
	$ ssh -X r24u35n680		$ ssh -X cn245
	```		```

	In this example, we log on the r24u35n680 compute node, with the X11 forwarding enabled.		In this example, we log on the cn245 compute node, with the X11 forwarding enabled.

	## Gnome GUI Environment		## Gnome GUI Environment

	@@ -143,7 +143,7 @@ xinit /usr/bin/ssh -XT -i .ssh/path_to_your_key yourname@cluster-namen.it4i.cz g
	```		```

	However, this method does not seem to work with recent Linux distributions and you will need to manually source		However, this method does not seem to work with recent Linux distributions and you will need to manually source
	/etc/profile to properly set environment variables for PBS.		/etc/profile to properly set environment variables for Slurm.

	### Gnome on Windows		### Gnome on Windows

docs.it4i/general/accessing-the-clusters/graphical-user-interface/xorg.md

+2 −2

Original line number	Original line	Diff line number	Diff line
	@@ -28,7 +28,7 @@ Some applications (e.g. Paraview, Ensight, Blender, Ovito) require not only visu
	1. Run interactive job in gnome terminal		1. Run interactive job in gnome terminal

	```console		```console
	[loginX.karolina]$ qsub -q qnvidia -l select=1 -IX -A OPEN-XX-XX -l xorg=True		[loginX.karolina]$ salloc --A PROJECT-ID -q qgpu --x11 --comment use:xorg=true
	```		```

	1. Run Xorg server		1. Run Xorg server
	@@ -82,7 +82,7 @@ Some applications (e.g. Paraview, Ensight, Blender, Ovito) require not only visu
	1. Run job from terminal:		1. Run job from terminal:

	```console		```console
	[loginX.karolina]$ qsub -q qnvidia -l select=1 -A OPEN-XX-XX -l xorg=True ./run_eevee.sh		[loginX.karolina]$ sbatch -A PROJECT-ID -q qcpu --comment use:xorg=true ./run_eevee.sh
	```		```

	[1]: ./vnc.md		[1]: ./vnc.md

docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-key-management.md

+31 −2

Original line number	Original line	Diff line number	Diff line
	@@ -10,7 +10,7 @@ SSH uses public-private key pair for authentication, allowing users to log in wi

	A private key file in the `id_rsa` or `*.ppk` format is present locally on local side and used for example in the Pageant SSH agent (for Windows users). The private key should always be kept in a safe place.		A private key file in the `id_rsa` or `*.ppk` format is present locally on local side and used for example in the Pageant SSH agent (for Windows users). The private key should always be kept in a safe place.

	An example of private key format:		### Example of RSA Private Key Format

	```console		```console
	-----BEGIN RSA PRIVATE KEY-----		-----BEGIN RSA PRIVATE KEY-----
	@@ -42,16 +42,45 @@ An example of private key format:
	-----END RSA PRIVATE KEY-----		-----END RSA PRIVATE KEY-----
	```		```

			### Example of Ed25519 Private Key Format

			```console
			PuTTY-User-Key-File-3: ssh-ed25519
			Encryption: aes256-cbc
			Comment: eddsa-key-20240910
			Public-Lines: 2
			AAAAC3NzaC1lZDI1NTE5AAAAIBKNwqaWU260wueN00nBGRwIqeOedRedtS0T7QVn
			h0i2
			Key-Derivation: Argon2id
			Argon2-Memory: 8192
			Argon2-Passes: 21
			Argon2-Parallelism: 1
			Argon2-Salt: bb64fc32b368aa16d6e8159c8d921f63
			Private-Lines: 1
			+7StvvEmCMchEy1tUyIMLfGTZBk7dgGUpJEJzNl82qmNZD1TmQOqNmCRiK84P/TL
			Private-MAC: dc3f83cef42026a2038f28e96f87367d762e72265621d82e2fe124634ec3c905
			```

	## Public Key		## Public Key

	A public key file in the `*.pub` format is present on the remote side and allows an access to the owner of the matching private key.		A public key file in the `*.pub` format is present on the remote side and allows an access to the owner of the matching private key.

	An example of public key format:		### Example of RSA Public Key Format

	```console		```console
	ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCpujuOiTKCcGkbbBhrk0Hjmezr5QpM0swscXQE7fOZG0oQSURoapd9tjC9eVy5FvZ339jl1WkJkdXSRtjc2G1U5wQh77VE5qJT0ESxQCEw0S+CItWBKqXhC9E7gFY+UyP5YBZcOneh6gGHyCVfK6H215vzKr3x+/WvWl5gZGtbf+zhX6o4RJDRdjZPutYJhEsg/qtMxcCtMjfm/dZTnXeafuebV8nug3RCBUflvRb1XUrJuiX28gsd4xfG/P6L/mNMR8s4kmJEZhlhxpj8Th0iIc+XciVtXuGWQrbddcVRLxAmvkYAPGnVVOQeNj69pqAR/GXaFAhvjYkseEowQao1 username@organization.example.com		ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCpujuOiTKCcGkbbBhrk0Hjmezr5QpM0swscXQE7fOZG0oQSURoapd9tjC9eVy5FvZ339jl1WkJkdXSRtjc2G1U5wQh77VE5qJT0ESxQCEw0S+CItWBKqXhC9E7gFY+UyP5YBZcOneh6gGHyCVfK6H215vzKr3x+/WvWl5gZGtbf+zhX6o4RJDRdjZPutYJhEsg/qtMxcCtMjfm/dZTnXeafuebV8nug3RCBUflvRb1XUrJuiX28gsd4xfG/P6L/mNMR8s4kmJEZhlhxpj8Th0iIc+XciVtXuGWQrbddcVRLxAmvkYAPGnVVOQeNj69pqAR/GXaFAhvjYkseEowQao1 username@organization.example.com
	```		```

			### Example of Ed25519 Public Key Format

			```console
			---- BEGIN SSH2 PUBLIC KEY ----
			Comment: "eddsa-key-20240910"
			AAAAC3NzaC1lZDI1NTE5AAAAIBKNwqaWU260wueN00nBGRwIqeOedRedtS0T7QVn
			h0i2
			---- END SSH2 PUBLIC KEY ----
			```

	## SSH Key Management		## SSH Key Management

	You can manage your own SSH key for authentication to clusters:		You can manage your own SSH key for authentication to clusters:

docs.it4i/general/accessing-the-clusters/shell-access-and-data-transfer/ssh-keys.md

+21 −1

Original line number	Original line	Diff line number	Diff line
	@@ -5,7 +5,7 @@
	To generate a new keypair of your public and private key, use the `ssh-keygen` tool:		To generate a new keypair of your public and private key, use the `ssh-keygen` tool:

	```console		```console
	local $ ssh-keygen -C 'username@organization.example.com' -f additional_key		local $ ssh-keygen -t ed25519 -C username@organization.example.com' -f additional_key
	```		```

	!!! note		!!! note
	@@ -14,6 +14,26 @@ local $ ssh-keygen -C 'username@organization.example.com' -f additional_key
	By default, your private key is saved to the `id_rsa` file in the `.ssh` directory		By default, your private key is saved to the `id_rsa` file in the `.ssh` directory
	and your public key is saved to the `id_rsa.pub` file.		and your public key is saved to the `id_rsa.pub` file.

			## Adding SSH Key to Linux System SSH Agent

			1. Check if SSH Agent is running:

			```
			eval "$(ssh-agent -s)"
			```

			1. Add the key to SSH Agent:

			```
			ssh-add ~/.ssh/name_of_your_ssh_key_file
			```

			1. Verify the key Added to SSH Agent:

			```
			ssh-add -l
			```

	## Managing Your SSH Key		## Managing Your SSH Key

	To manage your SSH key for authentication to clusters, see the [SSH Key Management][1] section.		To manage your SSH key for authentication to clusters, see the [SSH Key Management][1] section.

docs.it4i/general/accessing-the-clusters/vpn-access.md

+17 −9

Original line number	Original line	Diff line number	Diff line
	@@ -2,17 +2,26 @@

	## Accessing IT4Innovations Internal Resources via VPN		## Accessing IT4Innovations Internal Resources via VPN

	To access IT4Innovations' resources and licenses, it is necessary to connect to its local network via VPN. IT4Innovations uses the FortiClient VPN software. For the list of supported operating systems, see the [FortiClient Administration Guide][a].		To access IT4Innovations' resources and licenses, it is necessary to connect to its local network via VPN.
			IT4Innovations uses the FortiClient VPN software.
			For the list of supported operating systems, see the [FortiClient Administration Guide][a].

			!!! Note "Realms"
			If you are member of a partner organization, we may ask you to use so called realm in your VPN connection. In the Remote Gateway field, include the realm path after the IP address or hostname. For example, for a realm `excellent`, the field would read as follows `reconnect.it4i.cz:443/excellent`.

	## VPN Client Download		## VPN Client Download

	* Windows: Download the FortiClient app from the [Windows Store][b].		* Windows: Download the FortiClient VPN-only app from the [official page][g] (Microsoft Store app is not recommended).
	* Mac: Download the FortiClient VPN app from the [Apple Store][d].		* Mac: Download the FortiClient VPN app from the [Apple Store][d].
	* Linux: Download the [FortiClient][e] or [OpenFortiVPN][f] app.		* Linux: Download the [FortiClient][e] or [OpenFortiVPN][f] app.

	## Working With Windows/Mac VPN Client		## Working With Windows/Mac VPN Client

	Before the first login, you must configure the VPN. In the New VPN Connection section, provide the name of your VPN connection and the following settings:		!!! Tip "Instructional video for Mac"
			See [the instructional video][h] on how to download the VPN client and connect to the IT4I VPN on Mac.

			Before the first login, you must configure the VPN.
			In the New VPN Connection section, provide the name of your VPN connection and the following settings:

	Name \| Value		Name \| Value
	:-------------------\|:------------------		:-------------------\|:------------------
	@@ -27,9 +36,6 @@ Optionally, you can describe the VPN connection and select Save Login under Auth

	Save the settings, enter your login credentials and click Connect.		Save the settings, enter your login credentials and click Connect.

	!!! note
	Make sure your username and password are correct. If you enter invalid credentials, FortiClient VPN returns a general warning (-14).

	![](../../img/fc_vpn_web_login_3_1.png)		![](../../img/fc_vpn_web_login_3_1.png)

	## Linux Client		## Linux Client
	@@ -44,14 +50,16 @@ Set-Routes \| Enabled
	Set-DNS \| Enabled		Set-DNS \| Enabled
	DNS Servers \| 10.5.8.11, 10.5.8.22		DNS Servers \| 10.5.8.11, 10.5.8.22

	Linux VPN clients need to run under root. OpenFortiGUI uses sudo by default, be sure, that your user is allowed to use sudo.		Linux VPN clients need to run under root.
			OpenFortiGUI uses sudo by default; be sure that your user is allowed to use sudo.

	[1]: ../../general/obtaining-login-credentials/obtaining-login-credentials.md#login-credentials		[1]: ../../general/obtaining-login-credentials/obtaining-login-credentials.md#login-credentials
	[2]: ../../general/access/einfracz-account.md		[2]: ../../general/access/einfracz-account.md

	[a]: http://docs.fortinet.com/document/forticlient/latest/administration-guide/646779/installation-requirements		[a]: http://docs.fortinet.com/document/forticlient/latest/administration-guide/646779/installation-requirements
	[b]: https://apps.microsoft.com/store/detail/forticlient/9WZDNCRDH6MC?hl=en-us&gl=us
	[c]: https://github.com/theinvisible/openfortigui		[c]: https://github.com/theinvisible/openfortigui
	[d]: https://apps.apple.com/cz/app/forticlient-vpn/id1475674905?l=cs		[d]: https://apps.apple.com/cz/app/forticlient-vpn/id1475674905?l=cs
	[e]: https://www.fortinet.com/support/product-downloads/linux		[e]: https://www.fortinet.com/support/product-downloads/linux
	[f]: https://github.com/adrienverge/openfortivpn		[f]: https://github.com/adrienverge/openfortivpn
			[g]: https://www.fortinet.com/support/product-downloads#vpn
			[h]: https://www.youtube.com/watch?v=xGcROEreop8
			No newline at end of file

docs.it4i/general/applying-for-resources.md

+0 −1

Original line number	Original line	Diff line number	Diff line
	@@ -8,7 +8,6 @@ The computational resources of IT4I are allocated by the Allocation Committee vi

	* Academic researchers may apply via Open Access Competitions.		* Academic researchers may apply via Open Access Competitions.
	* Commercial and non-commercial institutions may also apply via the Directors Discretion.		* Commercial and non-commercial institutions may also apply via the Directors Discretion.
	* Foreign (mostly European) users can obtain computational resources via the [PRACE (DECI) program][d].

	In all cases, IT4Innovations’ access mechanisms are aimed at distributing computational resources while taking into account the development and application of supercomputing methods and their benefits and usefulness for society. The applicants are expected to submit a proposal. In the proposal, the applicants apply for a particular amount of core-hours of computational resources. The requested core-hours should be substantiated by scientific excellence of the proposal, its computational maturity and expected impacts. The allocation decision is based on the scientific, technical, and economic evaluation of the proposal.		In all cases, IT4Innovations’ access mechanisms are aimed at distributing computational resources while taking into account the development and application of supercomputing methods and their benefits and usefulness for society. The applicants are expected to submit a proposal. In the proposal, the applicants apply for a particular amount of core-hours of computational resources. The requested core-hours should be substantiated by scientific excellence of the proposal, its computational maturity and expected impacts. The allocation decision is based on the scientific, technical, and economic evaluation of the proposal.

docs.it4i/general/aup.md

0 → 100644

+4 −0

Original line number	Original line	Diff line number	Diff line
			# Acceptable Use Policy

			![PDF presentation on Slurm Batch Jobs Examples](../general/AUP-final.pdf){ type=application/pdf style="min-height:100vh;width:100%" }

docs.it4i/general/barbora-partitions.md

0 → 100644

+32 −0

Original line number	Original line	Diff line number	Diff line
			---
			hide:

			- toc

			---

			# Barbora Partitions

			!!! important
			Active [project membership][1] is required to run jobs.

			Below is the list of partitions available on the Barbora cluster:

			\| Partition \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (def/max) \|
			\| ---------------- \| -------------------- \| -------------------------- \| --------- \| -------- \| ------------- \| ------------------ \|
			\| qcpu \| > 0 \| 190 \| 36 \| 2 \| no \| 24 / 48h \|
			\| qcpu_biz \| > 0 \| 190 \| 36 \| 3 \| no \| 24 / 48h \|
			\| qcpu_exp \| < 150% of allocation \| 16 \| 36 \| 4 \| no \| 1 / 1h \|
			\| qcpu_free \| < 150% of allocation \| 124<br>max 4 per job \| 36 \| 1 \| no \| 12 / 18h \|
			\| qcpu_long \| > 0 \| 60<br>max 20 per job \| 36 \| 2 \| no \| 72 / 144h \|
			\| qcpu_preempt \| active Barbora<br>CPU alloc. \| 190<br>max 4 per job \| 36 \| 0 \| no \| 12 / 12h \|
			\| qgpu \| > 0 \| 8 \| 24 \| 2 \| yes \| 24 / 48h \|
			\| qgpu_biz \| > 0 \| 8 \| 24 \| 3 \| yes \| 24 / 48h \|
			\| qgpu_exp \| < 150% of allocation \| 4<br>max 1 per job \| 24 \| 4 \| no \| 1 / 1h \|
			\| qgpu_free \| < 150% of allocation \| 5<br>max 2 per job \| 24 \| 1 \| no \| 12 / 18h \|
			\| qgpu_preempt \| active Barbora<br>GPU alloc. \| 4<br>max 2 per job \| 24 \| 0 \| no \| 12 / 12h \|
			\| qdgx \| > 0 \| cn202 \| 96 \| 2 \| yes \| 4 / 48h \|
			\| qviz \| > 0 \| 2 with NVIDIA Quadro P6000 \| 4 \| 2 \| no \| 1 / 8h \|
			\| qfat \| > 0 \| 1 fat node \| 128 \| 2 \| yes \| 24 / 48h \|

			[1]: access/project-access.md

docs.it4i/general/barbora-queues.md

deleted100644 → 0

+0 −26

Original line number	Original line	Diff line number	Diff line
	# Barbora Queues

	Below is the list of queues available on the Barbora cluster:

	\| Queue \| Active project \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (default/max) \|
	\| ---------------- \| -------------- \| -------------------- \| -------------------------------- \| --------- \| -------- \| ------------- \| ---------------------- \|
	\| qcpu \| yes \| > 0 \| 190 nodes \| 36 \| 0 \| no \| 24 / 48h \|
	\| qcpu_biz \| yes \| > 0 \| 190 nodes \| 36 \| 50 \| no \| 24 / 48h \|
	\| qcpu_exp \| yes \| none required \| 16 nodes \| 36 \| 150 \| no \| 1 / 1h \|
	\| qcpu_free \| yes \| < 150% of allocation \| 124 nodes<br>max 4 nodes per job \| 36 \| -100 \| no \| 12 / 18h \|
	\| qcpu_long \| yes \| > 0 \| 60 nodes<br>max 20 nodes per job \| 36 \| 0 \| no \| 72 / 144h \|
	\| qcpu_preempt \| yes \| > 0 \| 190 nodes<br>max 4 nodes per job \| 36 \| -200 \| no \| 12 / 12h \|
	\| qgpu \| yes \| > 0 \| 8 nodes \| 24 \| 0 \| yes \| 24 / 48h \|
	\| qgpu_biz \| yes \| > 0 \| 8 nodes \| 24 \| 50 \| yes \| 24 / 48h \|
	\| qgpu_exp \| yes \| none required \| 4 nodes<br>max 1 node per job \| 24 \| 0 \| no \| 1 / 1h \|
	\| qgpu_free \| yes \| < 150% of allocation \| 5 nodes<br>max 2 nodes per job \| 24 \| -100 \| no \| 12 / 18h \|
	\| qgpu_preempt \| yes \| > 0 \| 4 nodes<br>max 2 nodes per job \| 24 \| -200 \| no \| 12 / 12h \|
	\| qdgx \| yes \| > 0 \| cn202 \| 96 \| 0 \| yes \| 4 / 48h \|
	\| qviz \| yes \| none required \| 2 nodes with NVIDIA Quadro P6000 \| 4 \| 0 \| no \| 1 / 8h \|
	\| qfat \| yes \| > 0 \| 1 fat node \| 128 \| 0 \| yes \| 24 / 48h \|
	\| Legacy Queues \|
	\| qexp \| no \| none required \| 16 nodes<br>max 4 nodes per job \| 36 \| 150 \| no \| 1 / 1h \|
	\| qprod \| yes \| > 0 \| 190 nodes w/o accelerator \| 36 \| 0 \| no \| 24 / 48h \|
	\| qlong \| yes \| > 0 \| 60 nodes w/o accelerator<br>max 20 nodes per job \| 36 \| 0 \| no \| 72 / 144h \|
	\| qnvidia \| yes \| > 0 \| 8 NVIDIA nodes \| 24 \| 0 \| yes \| 24 / 48h \|
	\| qfree \| yes \| < 150% of allocation \| 192 w/o accelerator<br>max 32 nodes per job \| 36 \| -100 \| no \| 12 / 12h \|

docs.it4i/general/capacity-computing.md

+16 −10

Original line number	Original line	Diff line number	Diff line
	@@ -2,20 +2,26 @@

	## Introduction		## Introduction

	In many cases, it is useful to submit a huge (>100) number of computational jobs into the PBS queue system. A huge number of (small) jobs is one of the most effective ways to execute embarrassingly parallel calculations, achieving the best runtime, throughput, and computer utilization.		In many cases, it is useful to submit a huge number of computational jobs into the Slurm queue system.
			A huge number of (small) jobs is one of the most effective ways to execute embarrassingly parallel calculations,
			achieving the best runtime, throughput, and computer utilization. This is called Capacity Computing

	However, executing a huge number of jobs via the PBS queue may strain the system. This strain may result in slow response to commands, inefficient scheduling, and overall degradation of performance and user experience for all users. For this reason, the number of jobs is limited to 100 jobs per user, 4,000 jobs and subjobs per user, 1,500 subjobs per job array.		However, executing a huge number of jobs via the Slurm queue may strain the system. This strain may
			result in slow response to commands, inefficient scheduling, and overall degradation of performance
			and user experience for all users.
			We recommend using [Job arrays][1] or [HyperQueue][2] to execute many jobs.

	!!! note		There are two primary scenarios:
	Follow one of the procedures below, in case you wish to schedule more than 100 jobs at a time.

	* Use [Job arrays][1] when running a huge number of multithread (bound to one node only) or multinode (multithread across several nodes) jobs.		1. Number of jobs < 1500, and the jobs are able to utilize one or more full nodes:
	* Use [HyperQueue][3] when running a huge number of multithread jobs. HyperQueue can help overcome the limits of job arrays.		Use [Job arrays][1].
			The Job array allows to submit and control up to 1500 jobs (tasks) in one packet. Several job arrays may be submitted.

	## Policy		2. Number of jobs >> 1500, or the jobs only utilize a few cores/accelerators each:
			Use [HyperQueue][2].
			HyperQueue can help efficiently load balance a very large number of jobs (tasks) amongst available computing nodes.
			HyperQueue may be also used if you have dependencies among the jobs.

	1. A user is allowed to submit at most 100 jobs. Each job may be [a job array][1].
	1. The array size is at most 1,000 subjobs.

	[1]: job-arrays.md		[1]: job-arrays.md
	[3]: hyperqueue.md		[2]: hyperqueue.md
			No newline at end of file

docs.it4i/general/energy.md

+1 −1

Original line number	Original line	Diff line number	Diff line
	# Energy Saving		# Energy Saving

	Due to high energy prices and reductions in funding, IT4Innovations has implemented a set of energy saving measures on the supercomputing clusters. The measures are selected to minimize the performance impact and achieve significant cost, energy, and carbon footprint reduction effect.		IT4Innovations has implemented a set of energy saving measures on the supercomputing clusters. The measures are selected to minimize the performance impact and achieve significant cost, energy, and carbon footprint reduction effect.

	The energy saving measures are effective as of 1.2.2023.		The energy saving measures are effective as of 1.2.2023.

docs.it4i/general/hyperqueue.md

+11 −11

docs.it4i/general/job-arrays.md

+44 −86

Original line number	Original line	Diff line number	Diff line
	# Job Arrays		# Job Arrays

	A job array is a compact representation of many jobs called subjobs. Subjobs share the same job script, and have the same values for all attributes and resources, with the following exceptions:		A job array is a compact representation of many jobs called tasks. Tasks share the same job script, and have the same values for all attributes and resources, with the following exceptions:

	* each subjob has a unique index, $PBS_ARRAY_INDEX		* each task has a unique index, `$SLURM_ARRAY_TASK_ID`
	* job Identifiers of subjobs only differ by their indices		* job Identifiers of tasks only differ by their indices
	* the state of subjobs can differ (R, Q, etc.)		* the state of tasks can differ

	All subjobs within a job array have the same scheduling priority and schedule as independent jobs. An entire job array is submitted through a single `qsub` command and may be managed by `qdel`, `qalter`, `qhold`, `qrls`, and `qsig` commands as a single job.		All tasks within a job array have the same scheduling priority and schedule as independent jobs. An entire job array is submitted through a single `sbatch` command and may be managed by `squeue`, `scancel` and `scontrol` commands as a single job.

	## Shared Jobscript		## Shared Jobscript

	All subjobs in a job array use the very same single jobscript. Each subjob runs its own instance of the jobscript. The instances execute different work controlled by the `$PBS_ARRAY_INDEX` variable.		All tasks in a job array use the very same single jobscript. Each task runs its own instance of the jobscript. The instances execute different work controlled by the `$SLURM_ARRAY_TASK_ID` variable.

	Example:		Example:

	Assume we have 900 input files with the name of each beginning with "file" (e.g. file001, ..., file900). Assume we would like to use each of these input files with myprog.x program executable, each as a separate job.		Assume we have 900 input files with the name of each beginning with "file" (e.g. file001, ..., file900). Assume we would like to use each of these input files with myprog.x program executable,
			each as a separate, single node job running 128 threats.

	First, we create a tasklist file (or subjobs list), listing all tasks (subjobs) - all input files in our example:		First, we create a `tasklist` file, listing all tasks - all input files in our example:

	```console		```console
	$ find . -name 'file*' > tasklist		$ find . -name 'file*' > tasklist
	@@ -26,117 +27,74 @@ Then we create a jobscript:

	```bash		```bash
	#!/bin/bash		#!/bin/bash
	#PBS -A OPEN-00-00		#SBATCH -p qcpu
	#PBS -q qprod		#SBATCH -A SERVICE
	#PBS -l select=1,walltime=02:00:00		#SBATCH --nodes 1 --ntasks-per-node 1 --cpus-per-task 128
			#SBATCH -t 02:00:00
			#SBATCH -o /dev/null

	# change to scratch directory		# change to scratch directory
	SCRDIR=/scratch/project/${PBS_ACCOUNT,,}/${USER}/${PBS_JOBID}		SCRDIR=/scratch/project/$SLURM_JOB_ACCOUNT/$SLURM_JOB_USER/$SLURM_JOB_ID
	mkdir -p $SCRDIR		mkdir -p $SCRDIR
	cd $SCRDIR \|\| exit		cd $SCRDIR \|\| exit

	# get individual tasks from tasklist with index from PBS JOB ARRAY		# get individual tasks from tasklist with index from SLURM JOB ARRAY
	TASK=$(sed -n "${PBS_ARRAY_INDEX}p" $PBS_O_WORKDIR/tasklist)		TASK=$(sed -n "${SLURM_ARRAY_TASK_ID}p" $SLURM_SUBMIT_DIR/tasklist)

	# copy input file and executable to scratch		# copy input file and executable to scratch
	cp $PBS_O_WORKDIR/$TASK input		cp $SLURM_SUBMIT_DIR/$TASK input
	cp $PBS_O_WORKDIR/myprog.x .		cp $SLURM_SUBMIT_DIR/myprog.x .

	# execute the calculation		# execute the calculation
	./myprog.x < input > output		./myprog.x < input > output

	# copy output file to submit directory		# copy output file to submit directory
	cp output $PBS_O_WORKDIR/$TASK.out		cp output $SLURM_SUBMIT_DIR/$TASK.out
	```		```

	In this example, the submit directory contains the 900 input files, the myprog.x executable, and the jobscript file. As an input for each run, we take the filename of the input file from the created tasklist file. We copy the input file to the local scratch memory `/lscratch/$PBS_JOBID`, execute the myprog.x and copy the output file back to the submit directory, under the `$TASK.out` name. The myprog.x executable runs on one node only and must use threads to run in parallel. Be aware, that if the myprog.x is not multithreaded, then all the jobs are run as single-thread programs in a sequential manner. Due to the allocation of the whole node, the accounted time is equal to the usage of the whole node, while using only 1/16 of the node.		In this example, the submit directory contains the 900 input files, the myprog.x executable,
			and the jobscript file. As an input for each run, we take the filename of the input file from the created
			tasklist file. We copy the input file to a scratch directory `/scratch/project/$SLURM_JOB_ACCOUNT/$SLURM_JOB_USER/$SLURM_JOB_ID`,
			execute the myprog.x and copy the output file back to the submit directory, under the `$TASK.out` name. The myprog.x executable runs on one node only and must use threads to run in parallel.
			Be aware, that if the myprog.x is not multithreaded or multi-process (MPI), then all the jobs are run as single-thread programs, wasting node resources.

	If running a huge number of parallel multicore (in means of multinode multithread, e.g. MPI enabled) jobs is needed, then a job array approach should be used. The main difference, as compared to the previous examples using one node, is that the local scratch memory should not be used (as it is not shared between nodes) and MPI or other techniques for parallel multinode processing has to be used properly.		## Submitting Job Array

	## Submiting Job Array		To submit the job array, use the `sbatch --array` command. The 900 jobs of the [example above][2] may be submitted like this:

	To submit the job array, use the `qsub -J` command. The 900 jobs of the [example above][3] may be submitted like this:

	```console
	$ qsub -N JOBNAME -J 1-900 jobscript
	506493[].isrv5
	```

	In this example, we submit a job array of 900 subjobs. Each subjob will run on one full node and is assumed to take less than 2 hours (note the #PBS directives in the beginning of the jobscript file, do not forget to set your valid PROJECT_ID and desired queue).

	Sometimes for testing purposes, you may need to submit a one-element only array. This is not allowed by PBSPro, but there is a workaround:

	```console		```console
	$ qsub -N JOBNAME -J 9-10:2 jobscript		$ sbatch -J JOBNAME --array 1-900 ./jobscript
	```		```

	This will only choose the lower index (9 in this example) for submitting/running your job.		In this example, we submit a job array of 900 tasks. Each task will run on one full node and is assumed to take less than 2 hours (note the #SBATCH directives in the beginning of the jobscript file, do not forget to set your valid PROJECT_ID and desired queue).

	## Managing Job Array		## Managing Job Array

	Check status of the job array using the `qstat` command.		Check status of the job array using the `squeue --me` command, alternatively `squeue --me --array`.

	```console		```console
	$ qstat -a 12345[].dm2		$ squeue --me --long
			JOBID PARTITION NAME USER STATE TIME TIME_LIMI NODES NODELIST(REASON)
	dm2:		2499924_[1-900] qcpu myarray user PENDING 0:00 02:00:00 1 (Resources)
	Req'd Req'd Elap
	Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time
	--------------- -------- -- \|---\|---\| ------ --- --- ------ ----- - -----
	12345[].dm2 user2 qprod xx 13516 1 16 -- 00:50 B 00:02
	```		```
			Check the status of the tasks using the `squeue` command.
	When the status is B, it means that some subjobs are already running.
	Check the status of the first 100 subjobs using the `qstat` command.

	```console		```console
	$ qstat -a 12345[1-100].dm2		$ squeue -j 2499924 --long
			JOBID PARTITION NAME USER STATE TIME TIME_LIMI NODES NODELIST(REASON)
	dm2:		2499924_1 qcpu myarray user PENDING 0:00 02:00:00 1 (Resources)
	Req'd Req'd Elap
	Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time
	--------------- -------- -- \|---\|---\| ------ --- --- ------ ----- - -----
	12345[1].dm2 user2 qprod xx 13516 1 16 -- 00:50 R 00:02
	12345[2].dm2 user2 qprod xx 13516 1 16 -- 00:50 R 00:02
	12345[3].dm2 user2 qprod xx 13516 1 16 -- 00:50 R 00:01
	12345[4].dm2 user2 qprod xx 13516 1 16 -- 00:50 Q --
	. . . . . . . . . . .		. . . . . . . . . . .
	, . . . . . . . . . .		. . . . . . . . . . .
	12345[100].dm2 user2 qprod xx 13516 1 16 -- 00:50 Q --		2499924_900 qcpu myarray user PENDING 0:00 02:00:00 1 (Resources)
	```

	Delete the entire job array. Running subjobs will be killed, queueing subjobs will be deleted.

	```console
	$ qdel 12345[].dm2
	```

	Deleting large job arrays may take a while.
	Display status information for all user's jobs, job arrays, and subjobs.

	```console
	$ qstat -u $USER -t
	```		```

	Display status information for all user's subjobs.		Delete the entire job array. Running tasks will be killed, queueing tasks will be deleted.

	```console		```console
	$ qstat -u $USER -tJ		$ scancel 2499924
	```		```

	For more information on job arrays, see the [PBSPro Users guide][1].		For more information on job arrays, see the [SLURM guide][1].

	## Examples

	Download the examples in [capacity.zip][2], illustrating the above listed ways to run a huge number of jobs. We recommend trying out the examples before using this for running production jobs.

	Unzip the archive in an empty directory on cluster and follow the instructions in the README file-

	```console
	$ unzip capacity.zip
	$ cat README
	```

	[1]: ../pbspro.md		[1]: https://slurm.schedmd.com/job_array.html
	[2]: capacity.zip		[2]: #shared-jobscript
	[3]: #shared-jobscript

docs.it4i/general/job-priority.md

+40 −38

Original line number	Original line	Diff line number	Diff line
	# Job Scheduling		# Job Scheduling

	## Job Execution Priority		## Job Priority

	The scheduler gives each job an execution priority and then uses this job execution priority to select which job(s) to run.		The scheduler gives each job a priority and then uses this job priority to select which job(s) to run.

	Job execution priority is determined by these job properties (in order of importance):		Job priority is determined by these job properties (in order of importance):

	1. queue priority		1. queue priority
	1. fair-share priority		1. fair-share priority
	1. eligible time		1. job age/eligible time

	### Queue Priority		### Queue Priority

	Queue priority is the priority of the queue in which the job is waiting prior to execution.		Queue priority is the priority of the queue in which the job is waiting prior to execution.

	Queue priority has the biggest impact on job execution priority. The execution priority of jobs in higher priority queues is always greater than the execution priority of jobs in lower priority queues. Other properties of jobs used for determining the job execution priority (fair-share priority, eligible time) cannot compete with queue priority.		Queue priority has the biggest impact on job priority. The priority of jobs in higher priority queues is always greater than the priority of jobs in lower priority queues. Other properties of jobs used for determining the job priority (fair-share priority, eligible time) cannot compete with queue priority.

	Queue priorities can be seen [here][a].		Queue priorities can be seen [here][a].

	@@ -24,35 +24,17 @@ Fair-share priority is calculated based on recent usage of resources. Fair-share

	Fair-share priority is used for ranking jobs with equal queue priority.		Fair-share priority is used for ranking jobs with equal queue priority.

	Fair-share priority is calculated as:		Usage decays, halving at intervals of 7 days.

	---8<--- "fairshare_formula.md"		### Job Age/Eligible Time

	where MAX_FAIRSHARE has the value of 1E6		The job age factor represents the length of time a job has been sitting in the queue and eligible to run.

	usage<sub>Project</sub> is the usage accumulated by all members of a selected project		Job age has the least impact on priority.

	usage<sub>Total</sub> is the total usage by all users, across all projects.

	Usage counts allocated node-hours (`ncpus x walltime`). Usage decays, halving at intervals of 168 hours (one week).
	Jobs queued in the queue qexp are not used to calculate the project's usage.

	!!! note
	Calculated usage and fair-share priority can be seen [here][b].

	Calculated fair-share priority can also be seen in the Resource_List.fairshare attribute of a job.

	### Eligible Time

	Eligible time is the amount of eligible time (in seconds) a job accrues while waiting to run. Jobs with higher eligible time gain higher priority.

	Eligible time has the least impact on execution priority. Eligible time is used for sorting jobs with equal queue priority and fair-share priority. It is very, very difficult for eligible time to compete with fair-share priority.

	Eligible time can be seen in the `eligible_time` attribute of a job.

	### Formula		### Formula

	Job execution priority (job sort formula) is calculated as:		Job priority is calculated as:

	---8<--- "job_sort_formula.md"		---8<--- "job_sort_formula.md"

	@@ -60,24 +42,44 @@ Job execution priority (job sort formula) is calculated as:

	The scheduler uses job backfilling.		The scheduler uses job backfilling.

	Backfilling means fitting smaller jobs around the higher-priority jobs that the scheduler is going to run next, in such a way that the higher-priority jobs are not delayed. Backfilling allows us to keep resources from becoming idle when the top job (the job with the highest execution priority) cannot run.		Backfilling means fitting smaller jobs around the higher-priority jobs that the scheduler is going to run next, in such a way that the higher-priority jobs are not delayed. Backfilling allows us to keep resources from becoming idle when the top job (the job with the highest priority) cannot run.

	The scheduler makes a list of jobs to run in order of execution priority. The scheduler looks for smaller jobs that can fit into the usage gaps around the highest-priority jobs in the list. The scheduler looks in the prioritized list of jobs and chooses the highest-priority smaller jobs that fit. Filler jobs are run only if they will not delay the start time of top jobs.		The scheduler makes a list of jobs to run in order of priority. The scheduler looks for smaller jobs that can fit into the usage gaps around the highest-priority jobs in the list. The scheduler looks in the prioritized list of jobs and chooses the highest-priority smaller jobs that fit. Filler jobs are run only if they will not delay the start time of top jobs.

	This means that jobs with lower execution priority can be run before jobs with higher execution priority.		This means that jobs with lower priority can be run before jobs with higher priority.

	!!! note		!!! note
	It is very beneficial to specify the walltime when submitting jobs.		It is very beneficial to specify the timelimit when submitting jobs.

	Specifying more accurate walltime enables better scheduling, better execution times, and better resource usage. Jobs with suitable (small) walltime can be backfilled - and overtake job(s) with a higher priority.		Specifying more accurate timelimit enables better scheduling, better times, and better resource usage. Jobs with suitable (small) timelimit can be backfilled - and overtake job(s) with a higher priority.

	---8<--- "mathjax.md"		---8<--- "mathjax.md"

	### Job Placement		## Technical Details

			Priorities are set using Slurm's [Multifactor Priority Plugin][1]. Current settings are as follows:

			```
			$ grep ^Priority /etc/slurm/slurm.conf
			PriorityFlags=DEPTH_OBLIVIOUS
			PriorityType=priority/multifactor
			PriorityDecayHalfLife=7-0
			PriorityMaxAge=14-0
			PriorityWeightAge=100000
			PriorityWeightFairshare=10000000
			PriorityWeightPartition=1000000000
			```

			## Inspecting Job Priority

			One can inspect job priority using `sprio` command. Job priority is in the field PRIORITY and it is comprised of PARTITION, FAIRSHARE and AGE priorities.

	Job [placement can be controlled by flags during submission][1].		```
			$ sprio -l -j 894782
			JOBID PARTITION USER ACCOUNT PRIORITY SITE AGE ASSOC FAIRSHARE JOBSIZE PARTITION QOSNAME QOS NICE TRES
			894782 qgpu user1 service 300026688 0 17 0 26671 0 300000000 normal 0 0
			```

	[1]: job-submission-and-execution.md#advanced-job-placement		[1]: https://slurm.schedmd.com/priority_multifactor.html

	[a]: https://extranet.it4i.cz/rsweb/barbora/queues		[a]: https://extranet.it4i.cz/rsweb/karolina/queues
	[b]: https://extranet.it4i.cz/rsweb/barbora/projects

docs.it4i/general/job-submission-and-execution.md

deleted100644 → 0

+1 −458

Original line number	Original line	Diff line number	Diff line
	# Job Submission and Execution

	## Job Submission

	When allocating computational resources for the job, specify:

	1. a suitable queue for your job (the default is qprod)
	1. the number of computational nodes (required)
	1. the number of cores per node (not required)
	1. the maximum wall time allocated to your calculation, note that jobs exceeding the maximum wall time will be killed
	1. your Project ID
	1. a Jobscript or interactive switch

	Submit the job using the `qsub` command:

	```console
	$ qsub -A Project_ID -q queue -l select=x:ncpus=y,walltime=[[hh:]mm:]ss[.ms] jobscript
	```

	The `qsub` command submits the job to the queue, i.e. it creates a request to the PBS Job manager for allocation of specified resources. The resources will be allocated when available, subject to the above described policies and constraints. After the resources are allocated, the jobscript or interactive shell is executed on the first of the allocated nodes.

	!!! note
	`ncpus=y` is usually not required, because the smallest allocation unit is an entire node. The exception are corner cases for `qviz` and `qfat` on Karolina.

	### Job Submission Examples

	```console
	$ qsub -A OPEN-0-0 -q qprod -l select=64,walltime=03:00:00 ./myjob
	```

	In this example, we allocate 64 nodes, 36 cores per node, for 3 hours. We allocate these resources via the `qprod` queue, consumed resources will be accounted to the project identified by Project ID `OPEN-0-0`. The jobscript `myjob` will be executed on the first node in the allocation.

	```console
	$ qsub -q qexp -l select=4 -I
	```

	In this example, we allocate 4 nodes, 36 cores per node, for 1 hour. We allocate these resources via the `qexp` queue. The resources will be available interactively.

	```console
	$ qsub -A OPEN-0-0 -q qnvidia -l select=10 ./myjob
	```

	In this example, we allocate 10 NVIDIA accelerated nodes, 24 cores per node, for 24 hours. We allocate these resources via the `qnvidia` queue. The jobscript `myjob` will be executed on the first node in the allocation.

	```console
	$ qsub -A OPEN-0-0 -q qfree -l select=10 ./myjob
	```

	In this example, we allocate 10 nodes, 24 cores per node, for 12 hours. We allocate these resources via the `qfree` queue. It is not required that the project `OPEN-0-0` has any available resources left. Consumed resources are still accounted for. The jobscript `myjob` will be executed on the first node in the allocation.

	All `qsub` options may be [saved directly into the jobscript][1]. In such cases, it is not necessary to specify any options for `qsub`.

	```console
	$ qsub ./myjob
	```

	By default, the PBS batch system sends an email only when the job is aborted. Disabling mail events completely can be done as follows:

	```console
	$ qsub -m n
	```

	#### Dependency Job Submission

	To submit dependent jobs in sequence, use the `depend` function of `qsub`.

	First submit the first job in a standard manner:

	```console
	$ qsub -A OPEN-0-0 -q qprod -l select=64,walltime=02:00:00 ./firstjob
	123456[].isrv1
	```

	Then submit the second job using the `depend` function:

	```console
	$ qsub -W depend=afterok:123456 ./secondjob
	```

	Both jobs will be queued, but the second job won't start until the first job has finished successfully.

	Below is the list of arguments that can be used with `-W depend=dependency:jobid`:

	\| Argument \| Description \|
	\| ----------- \| --------------------------------------------------------------- \|
	\| after \| This job is scheduled after `jobid` begins execution. \|
	\| afterok \| This job is scheduled after `jobid` finishes successfully. \|
	\| afternotok \| This job is scheduled after `jobid` finishes unsucessfully. \|
	\| afterany \| This job is scheduled after `jobid` finishes in any state. \|
	\| before \| This job must begin execution before `jobid` is scheduled. \|
	\| beforeok \| This job must finish successfully before `jobid` begins. \|
	\| beforenotok \| This job must finish unsuccessfully before `jobid` begins. \|
	\| beforeany \| This job must finish in any state before `jobid` begins. \|

	### Useful Tricks

	All `qsub` options may be [saved directly into the jobscript][1]. In such a case, no options to `qsub` are needed.

	```console
	$ qsub ./myjob
	```

	By default, the PBS batch system sends an email only when the job is aborted. Disabling mail events completely can be done like this:

	```console
	$ qsub -m n
	```

	<!--- NOT IMPLEMENTED ON KAROLINA YET

	## Advanced Job Placement

	### Salomon - Placement by Network Location

	The network location of allocated nodes in the [InfiniBand network][3] influences efficiency of network communication between nodes of job. Nodes on the same InfiniBand switch communicate faster with lower latency than distant nodes. To improve communication efficiency of jobs, PBS scheduler on Salomon is configured to allocate nodes (from currently available resources), which are as close as possible in the network topology.

	For communication intensive jobs, it is possible to set stricter requirement - to require nodes directly connected to the same InfiniBand switch or to require nodes located in the same dimension group of the InfiniBand network.

	### Salomon - Placement by InfiniBand Switch

	Nodes directly connected to the same InfiniBand switch can communicate most efficiently. Using the same switch prevents hops in the network and provides for unbiased, most efficient network communication. There are 9 nodes directly connected to every InfiniBand switch.

	!!! note
	We recommend allocating compute nodes of a single switch when the best possible computational network performance is required to run job efficiently.

	Nodes directly connected to the one InfiniBand switch can be allocated using node grouping on the PBS resource attribute `switch`.

	In this example, we request all 9 nodes directly connected to the same switch using node grouping placement.

	```console
	$ qsub -A OPEN-0-0 -q qprod -l select=9 -l place=group=switch ./myjob
	```

	-->

	## Advanced Job Handling

	### Selecting Turbo Boost Off

	!!! note
	For Barbora only.

	Intel Turbo Boost Technology is on by default. We strongly recommend keeping the default.

	If necessary (such as in the case of benchmarking), you can disable Turbo for all nodes of the job by using the PBS resource attribute `cpu_turbo_boost`:

	```console
	$ qsub -A OPEN-0-0 -q qprod -l select=4 -l cpu_turbo_boost=0 -I
	```

	More information about the Intel Turbo Boost can be found in the TurboBoost section

	### Advanced Examples

	In the following example, we select an allocation for benchmarking a very special and demanding MPI program. We request Turbo off, and 2 full chassis of compute nodes (nodes sharing the same IB switches) for 30 minutes:

	```console
	$ qsub -A OPEN-0-0 -q qprod
	-l select=18:ibswitch=isw10:mpiprocs=1:ompthreads=16+18:ibswitch=isw20:mpiprocs=16:ompthreads=1
	-l cpu_turbo_boost=0,walltime=00:30:00
	-N Benchmark ./mybenchmark
	```

	The MPI processes will be distributed differently on the nodes connected to the two switches. On the isw10 nodes, we will run 1 MPI process per node with 16 threads per process, on isw20 nodes we will run 16 plain MPI processes.

	Although this example is somewhat artificial, it demonstrates the flexibility of the qsub command options.

	## Job Management

	!!! note
	Check the status of your jobs using the `qstat` and `check-pbs-jobs` commands

	```console
	$ qstat -a
	$ qstat -a -u username
	$ qstat -an -u username
	$ qstat -f 12345.srv11
	```

	Example:

	```console
	$ qstat -a

	srv11:
	Req'd Req'd Elap
	Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time
	--------------- -------- -- \|---\|---\| ------ --- --- ------ ----- - -----
	16287.srv11 user1 qlong job1 6183 4 64 -- 144:0 R 38:25
	16468.srv11 user1 qlong job2 8060 4 64 -- 144:0 R 17:44
	16547.srv11 user2 qprod job3x 13516 2 32 -- 48:00 R 00:58
	```

	In this example user1 and user2 are running jobs named `job1`, `job2`, and `job3x`. `job1` and `job2` are using 4 nodes, 128 cores per node each. `job1` has already run for 38 hours and 25 minutes, and `job2` for 17 hours 44 minutes. So `job1`, for example, has already consumed `64 x 38.41 = 2,458.6` core-hours. `job3x` has already consumed `32 x 0.96 = 30.93` core-hours. These consumed core-hours will be [converted to node-hours][10] and accounted for on the respective project accounts, regardless of whether the allocated cores were actually used for computations.

	The following commands allow you to check the status of your jobs using the `check-pbs-jobs` command, check for the presence of user's PBS jobs' processes on execution hosts, display load and processes, display job standard and error output, and continuously display (`tail -f`) job standard or error output.

	```console
	$ check-pbs-jobs --check-all
	$ check-pbs-jobs --print-load --print-processes
	$ check-pbs-jobs --print-job-out --print-job-err
	$ check-pbs-jobs --jobid JOBID --check-all --print-all
	$ check-pbs-jobs --jobid JOBID --tailf-job-out
	```

	Examples:

	```console
	$ check-pbs-jobs --check-all
	JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
	Check session id: OK
	Check processes
	cn164: OK
	cn165: No process
	```

	In this example we see that job `35141.dm2` is not currently running any processes on the allocated node cn165, which may indicate an execution error:

	```console
	$ check-pbs-jobs --print-load --print-processes
	JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
	Print load
	cn164: LOAD: 16.01, 16.01, 16.00
	cn165: LOAD: 0.01, 0.00, 0.01
	Print processes
	%CPU CMD
	cn164: 0.0 -bash
	cn164: 0.0 /bin/bash /var/spool/PBS/mom_priv/jobs/35141.dm2.SC
	cn164: 99.7 run-task
	...
	```

	In this example, we see that job `35141.dm2` is currently running a process run-task on node `cn164`, using one thread only, while node `cn165` is empty, which may indicate an execution error.

	```console
	$ check-pbs-jobs --jobid 35141.dm2 --print-job-out
	JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
	Print job standard output:
	======================== Job start ==========================
	Started at : Fri Aug 30 02:47:53 CEST 2013
	Script name : script
	Run loop 1
	Run loop 2
	Run loop 3
	```

	In this example, we see the actual output (some iteration loops) of the job `35141.dm2`.

	!!! note
	Manage your queued or running jobs, using the `qhold`, `qrls`, `qdel`, `qsig`, or `qalter` commands

	You may release your allocation at any time, using the `qdel` command

	```console
	$ qdel 12345.srv11
	```

	You may kill a running job by force, using the `qsig` command

	```console
	$ qsig -s 9 12345.srv11
	```

	Learn more by reading the PBS man page

	```console
	$ man pbs_professional
	```

	## Job Execution

	### Jobscript

	!!! note
	Prepare the jobscript to run batch jobs in the PBS queue system

	The Jobscript is a user made script controlling a sequence of commands for executing the calculation. It is often written in bash, though other scripts may be used as well. The jobscript is supplied to the PBS `qsub` command as an argument, and is executed by the PBS Professional workload manager.

	!!! note
	The jobscript or interactive shell is executed on first of the allocated nodes.

	```console
	$ qsub -q qexp -l select=4 -N Name0 ./myjob
	$ qstat -n -u username

	srv11:
	Req'd Req'd Elap
	Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time
	--------------- -------- -- \|---\|---\| ------ --- --- ------ ----- - -----
	15209.srv11 username qexp Name0 5530 4 128 -- 01:00 R 00:00
	cn17/032+cn108/032+cn109/032+cn110/032
	```

	In this example, the nodes `cn17`, `cn108`, `cn109`, and `cn110` were allocated for 1 hour via the qexp queue. The `myjob` jobscript will be executed on the node `cn17`, while the nodes `cn108`, `cn109`, and `cn110` are available for use as well.

	The jobscript or interactive shell is by default executed in the `/home` directory:

	```console
	$ qsub -q qexp -l select=4 -I
	qsub: waiting for job 15210.srv11 to start
	qsub: job 15210.srv11 ready

	$ pwd
	/home/username
	```

	In this example, 4 nodes were allocated interactively for 1 hour via the `qexp` queue. The interactive shell is executed in the `/home` directory.

	!!! note
	All nodes within the allocation may be accessed via SSH. Unallocated nodes are not accessible to the user.

	The allocated nodes are accessible via SSH from login nodes. The nodes may access each other via SSH as well.

	Calculations on allocated nodes may be executed remotely via the MPI, SSH, pdsh, or clush. You may find out which nodes belong to the allocation by reading the `$PBS_NODEFILE` file

	```console
	$ qsub -q qexp -l select=4 -I
	qsub: waiting for job 15210.srv11 to start
	qsub: job 15210.srv11 ready

	$ pwd
	/home/username

	$ sort -u $PBS_NODEFILE
	cn17.bullx
	cn108.bullx
	cn109.bullx
	cn110.bullx

	$ pdsh -w cn17,cn[108-110] hostname
	cn17: cn17
	cn108: cn108
	cn109: cn109
	cn110: cn110
	```

	In this example, the hostname program is executed via `pdsh` from the interactive shell. The execution runs on all four allocated nodes. The same result would be achieved if the `pdsh` were called from any of the allocated nodes or from the login nodes.

	### Example Jobscript for MPI Calculation

	!!! note
	Production jobs must use the /scratch directory for I/O

	The recommended way to run production jobs is to change to the `/scratch` directory early in the jobscript, copy all inputs to `/scratch`, execute the calculations, and copy outputs to the `/home` directory.

	```bash
	#!/bin/bash

	cd $PBS_O_WORKDIR

	SCRDIR=/scratch/project/open-00-00/${USER}/myjob
	mkdir -p $SCRDIR

	# change to scratch directory, exit on failure
	cd $SCRDIR \|\| exit

	# copy input file to scratch
	cp $PBS_O_WORKDIR/input .
	cp $PBS_O_WORKDIR/mympiprog.x .

	# load the MPI module
	# (Always specify the module's name and version in your script;
	# for the reason, see https://docs.it4i.cz/software/modules/lmod/#loading-modules.)
	ml OpenMPI/4.1.1-GCC-10.2.0-Java-1.8.0_221

	# execute the calculation
	mpirun -pernode ./mympiprog.x

	# copy output file to home
	cp output $PBS_O_WORKDIR/.

	#exit
	exit
	```

	In this example, a directory in `/home` holds the input file input and the `mympiprog.x` executable. We create the `myjob` directory on the `/scratch` filesystem, copy input and executable files from the `/home` directory where the `qsub` was invoked (`$PBS_O_WORKDIR`) to `/scratch`, execute the MPI program `mympiprog.x` and copy the output file back to the `/home` directory. `mympiprog.x` is executed as one process per node, on all allocated nodes.

	!!! note
	Consider preloading inputs and executables onto [shared scratch][6] memory before the calculation starts.

	In some cases, it may be impractical to copy the inputs to the `/scratch` memory and the outputs to the `/home` directory. This is especially true when very large input and output files are expected, or when the files should be reused by a subsequent calculation. In such cases, it is the users' responsibility to preload the input files on the shared `/scratch` memory before the job submission, and retrieve the outputs manually after all calculations are finished.

	!!! note
	Store the `qsub` options within the jobscript. Use the `mpiprocs` and `ompthreads` qsub options to control the MPI job execution.

	### Example Jobscript for MPI Calculation With Preloaded Inputs

	Example jobscript for an MPI job with preloaded inputs and executables, options for `qsub` are stored within the script:

	```bash
	#!/bin/bash
	#PBS -q qprod
	#PBS -N MYJOB
	#PBS -l select=100:mpiprocs=1:ompthreads=16
	#PBS -A OPEN-00-00

	# job is run using project resources; here ${PBS_ACCOUNT,,} translates to "open-00-00"
	SCRDIR=/scratch/project/${PBS_ACCOUNT,,}/${USER}/myjob

	# change to scratch directory, exit on failure
	cd $SCRDIR \|\| exit

	# load the MPI module
	# (Always specify the module's name and version in your script;
	# for the reason, see https://docs.it4i.cz/software/modules/lmod/#loading-modules.)
	ml OpenMPI/4.1.1-GCC-10.2.0-Java-1.8.0_221

	# execute the calculation
	mpirun ./mympiprog.x

	#exit
	exit
	```

	In this example, input and executable files are assumed to be preloaded manually in the `/scratch/project/open-00-00/$USER/myjob` directory. Because we used the `qprod` queue, we had to specify which project's resources we want to use, and our `PBS_ACCOUNT` variable will be set accordingly (OPEN-00-00). `${PBS_ACCOUNT,,}` uses one of the bash's built-in functions to translate it into lower case.

	Note the `mpiprocs` and `ompthreads` qsub options controlling the behavior of the MPI execution. `mympiprog.x` is executed as one process per node, on all 100 allocated nodes. If `mympiprog.x` implements OpenMP threads, it will run 16 threads per node.

	### Example Jobscript for Single Node Calculation

	!!! note
	The local scratch directory is often useful for single node jobs. Local scratch memory will be deleted immediately after the job ends.

	Example jobscript for single node calculation, using [local scratch][6] memory on the node:

	```bash
	#!/bin/bash

	# change to local scratch directory
	cd /lscratch/$PBS_JOBID \|\| exit

	# copy input file to scratch
	cp $PBS_O_WORKDIR/input .
	cp $PBS_O_WORKDIR/myprog.x .

	# execute the calculation
	./myprog.x

	# copy output file to home
	cp output $PBS_O_WORKDIR/.

	#exit
	exit
	```

	In this example, a directory in `/home` holds the input file input and the executable `myprog.x`. We copy input and executable files from the `/home` directory where the `qsub` was invoked (`$PBS_O_WORKDIR`) to the local `/scratch` memory `/lscratch/$PBS_JOBID`, execute `myprog.x` and copy the output file back to the `/home directory`. `myprog.x` runs on one node only and may use threads.

	### Other Jobscript Examples

	Further jobscript examples may be found in the software section and the [Capacity computing][9] section.

	[1]: #example-jobscript-for-mpi-calculation-with-preloaded-inputs
	[2]: resources-allocation-policy.md
	[3]: ../salomon/network.md
	[5]: ../salomon/7d-enhanced-hypercube.md
	[6]: ../salomon/storage.md
	[9]: capacity-computing.md
	[10]: resources-allocation-policy.md#resource-accounting-policy

docs.it4i/general/job-submission-and-execution.md

0 → 120000

+1 −458

Original line number	Original line	Diff line number	Diff line
			slurm-job-submission-and-execution.md
			No newline at end of file

docs.it4i/general/karolina-mpi.md

+3 −0

Original line number	Original line	Diff line number	Diff line
			!!!warning
			This page has not been updated yet. The page does not reflect the transition from PBS to Slurm.

	# Parallel Runs Setting on Karolina		# Parallel Runs Setting on Karolina

	Important aspect of each parallel application is correct placement of MPI processes		Important aspect of each parallel application is correct placement of MPI processes

docs.it4i/general/karolina-partitions.md

0 → 100644

+32 −0

Original line number	Original line	Diff line number	Diff line
			---
			hide:

			- toc

			---

			# Karolina Partitions

			!!! important
			Active [project membership][1] is required to run jobs.

			Below is the list of partitions available on the Karolina cluster:

			\| Partition \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (def/max) \|
			\| ---------------- \| -------------------- \| --------------------------------------------------------- \| ----------- \| -------- \| ------------- \| ------------------ \|
			\| qcpu \| > 0 \| 720 \| 128 \| 2 \| no \| 24 / 48h \|
			\| qcpu_biz \| > 0 \| 720 \| 128 \| 3 \| no \| 24 / 48h \|
			\| qcpu_exp \| < 150% of allocation \| 720<br>max 2 per user \| 128 \| 4 \| no \| 1 / 1h \|
			\| qcpu_free \| < 150% of allocation \| 720<br>max 4 per job \| 128 \| 1 \| no \| 12 / 18h \|
			\| qcpu_long \| > 0 \| 200<br>max 20 per job, only non-accelerated nodes allowed \| 128 \| 2 \| no \| 72 / 144h \|
			\| qcpu_preempt \| active Karolina<br> CPU alloc. \| 720<br>max 4 per job \| 128 \| 0 \| no \| 12 / 12h \|
			\| qgpu \| > 0 \| 72<br>max 16 per job \| 16<br>1 gpu \| 3 \| yes \| 24 / 48h \|
			\| qgpu_big \| > 0 \| 72<br>max 64 per job \| 128 \| 2 \| yes \| 12 / 12h \|
			\| qgpu_biz \| > 0 \| 72<br>max 16 per job \| 128 \| 4 \| yes \| 24 / 48h \|
			\| qgpu_exp \| < 150% of allocation \| 4<br>max 1 per job \| 16<br>1 gpu \| 5 \| no \| 1 / 1h \|
			\| qgpu_free \| < 150% of allocation \| 46<br>max 2 per job \| 16<br>1 gpu \| 1 \| no \| 12 / 18h \|
			\| qgpu_preempt \| active Karolina<br> GPU alloc. \| 72<br>max 2 per job \| 16<br>1 gpu \| 0 \| no \| 12 / 12h \|
			\| qviz \| > 0 \| 2 with NVIDIA® Quadro RTX™ 6000 \| 8 \| 2 \| no \| 1 / 8h \|
			\| qfat \| > 0 \| 1 (sdf1) \| 24 \| 2 \| yes \| 24 / 48h \|

			[1]: access/project-access.md

docs.it4i/general/karolina-queues.md

deleted100644 → 0

+0 −31

Original line number	Original line	Diff line number	Diff line
	# Karolina Queues

	Below is the list of queues available on the Karolina cluster:

	\| Queue \| Active project \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (default/max) \|
	\| ---------------- \| -------------- \| -------------------- \| ------------------------------------------------------------- \| --------- \| -------- \| ------------- \| ----------------------- \|
	\| qcpu \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qcpu_biz \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qcpu_eurohpc \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qcpu_exp \| yes \| none required \| 756 nodes<br>max 2 nodes per user \| 128 \| 150 \| no \| 1 / 1h \|
	\| qcpu_free \| yes \| < 150% of allocation \| 756 nodes<br>max 4 nodes per job \| 128 \| -100 \| no \| 12 / 18h \|
	\| qcpu_long \| yes \| > 0 \| 200 nodes<br>max 20 nodes per job, only non-accelerated nodes allowed \| 128 \| 0 \| no \| 72 / 144h \|
	\| qcpu_preempt \| yes \| > 0 \| 756 nodes<br>max 4 nodes per job \| 128 \| -200 \| no \| 12 / 12h \|
	\| qgpu \| yes \| > 0 \| 72 nodes \| 16 cpus<br>1 gpu \| 0 \| yes \| 24 / 48h \|
	\| qgpu_biz \| yes \| > 0 \| 70 nodes \| 128 \| 0 \| yes \| 24 / 48h \|
	\| qgpu_eurohpc \| yes \| > 0 \| 70 nodes \| 128 \| 0 \| yes \| 24 / 48h \|
	\| qgpu_exp \| yes \| none required \| 4 nodes<br>max 1 node per job \| 16 cpus<br>1 gpu\| 150\| no \| 1 / 1h \|
	\| qgpu_free \| yes \| < 150% of allocation \| 46 nodes<br>max 2 nodes per job \| 16 cpus<br>1 gpu\|-100\| no \| 12 / 18h \|
	\| qgpu_preempt \| yes \| > 0 \| 72 nodes<br>max 2 nodes per job \| 16 cpus<br>1 gpu\|-200\| no \| 12 / 12h \|
	\| qviz \| yes \| none required \| 2 nodes (with NVIDIA® Quadro RTX™ 6000) \| 8 \| 0 \| no \| 1 / 8h \|
	\| qfat \| yes \| > 0 \| 1 (sdf1) \| 24 \| 0 \| yes \| 24 / 48h \|

	## Legacy Queues

	\| Queue \| Active project \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (default/max) \|
	\| ---------------- \| -------------- \| -------------------- \| ------------------------------------------------------------- \| --------- \| -------- \| ------------- \| ----------------------- \|
	\| qfree \| yes \| < 150% of allocation \| 756 nodes<br>max 4 nodes per job \| 128 \| -100 \| no \| 12 / 12h \|
	\| qexp \| no \| none required \| 756 nodes<br>max 2 nodes per job \| 128 \| 150 \| no \| 1 / 1h \|
	\| qprod \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qlong \| yes \| > 0 \| 200 nodes<br>max 20 nodes per job, only non-accelerated nodes allowed \| 128 \| 0 \| no \| 72 / 144h \|
	\| qnvidia \| yes \| > 0 \| 72 nodes \| 128 \| 0 \| yes \| 24 / 48h \|

docs.it4i/general/karolina-slurm.md

0 → 100644

+190 −0

Original line number	Original line	Diff line number	Diff line
			# Karolina - Job Submission and Execution

			## Introduction

			[Slurm][1] workload manager is used to allocate and access Karolina cluster's resources.
			This page describes Karolina cluster's specific Slurm settings and usage.
			General information about Slurm usage at IT4Innovations can be found at [Slurm Job Submission and Execution][2].

			## Partition Information

			Partitions/queues on the system:

			```console
			$ sinfo -s
			PARTITION AVAIL TIMELIMIT NODES(A/I/O/T) NODELIST
			qcpu* up 2-00:00:00 1/717/0/718 cn[001-718]
			qcpu_biz up 2-00:00:00 1/717/0/718 cn[001-718]
			qcpu_exp up 1:00:00 1/719/0/720 cn[001-720]
			qcpu_free up 18:00:00 1/717/0/718 cn[001-718]
			qcpu_long up 6-00:00:00 1/717/0/718 cn[001-718]
			qcpu_preempt up 12:00:00 1/717/0/718 cn[001-718]
			qgpu up 2-00:00:00 0/70/0/70 acn[01-70]
			qgpu_big up 12:00:00 71/1/0/72 acn[01-72]
			qgpu_biz up 2-00:00:00 0/70/0/70 acn[01-70]
			qgpu_exp up 1:00:00 0/72/0/72 acn[01-72]
			qgpu_free up 18:00:00 0/70/0/70 acn[01-70]
			qgpu_preempt up 12:00:00 0/70/0/70 acn[01-70]
			qfat up 2-00:00:00 0/1/0/1 sdf1
			qviz up 8:00:00 0/2/0/2 viz[1-2]
			```

			For more information about Karolina's queues, see [this page][8].

			Graphical representation of cluster usage, partitions, nodes, and jobs could be found
			at [https://extranet.it4i.cz/rsweb/karolina][3]

			On Karolina cluster

			* all CPU queues/partitions provide full node allocation, whole nodes (all node resources) are allocated to a job.
			* other queues/partitions (gpu, fat, viz) provide partial node allocation. Jobs' resources (cpu, mem) are separated and dedicated for job.

			!!! important "Partial node allocation and security"
			Division of nodes means that if two users allocate a portion of the same node, they can see each other's running processes.
			If this solution is inconvenient for you, consider allocating a whole node.


			IT4I clusters are monitored for resources utilization.
			One of the monitoring daemons is using registers to collect performance
			monitoring counters (PMC), which user may need when analysing performance
			of the executed application (perf or [Score-P][10] profiling tools).
			To deactivate the daemon and release the respective registers set job feature
			during allocation, as specified [here][9].

			## Using CPU Queues

			Access [standard compute nodes][4].
			Whole nodes are allocated. Use the `--nodes` option to specify the number of requested nodes.
			There is no need to specify the number of cores and memory size.

			```console
			#!/usr/bin/bash
			#SBATCH --job-name MyJobName
			#SBATCH --account PROJECT-ID
			#SBATCH --partition qcpu
			#SBATCH --time 12:00:00
			#SBATCH --nodes 8
			...
			```

			## Using GPU Queues

			Access [GPU accelerated nodes][5].
			Every GPU accelerated node is divided into eight parts, each part contains one GPU, 16 CPU cores and corresponding memory.
			By default, only one part, i.e. 1/8 of the node - one GPU and corresponding CPU cores and memory, is allocated.
			There is no need to specify the number of cores and memory size, on the contrary, it is undesirable.
			There are employed some restrictions which aim to provide fair division and efficient use of node resources.

			```console
			#!/usr/bin/bash
			#SBATCH --job-name MyJobName
			#SBATCH --account PROJECT-ID
			#SBATCH --partition qgpu
			#SBATCH --time 12:00:00
			...
			```

			To allocate more GPUs use `--gpus` option.
			The default behavior is to allocate enough nodes to satisfy the requested resources as expressed by `--gpus` option and without delaying the initiation of the job.

			The following code requests one GPU. One GPU and 16 CPU cores will be allocated to the job. Up to eight jobs could run on single GPU node.

			```console
			#SBATCH --gpus 1
			```

			The following code requests four GPUs; scheduler can allocate from one up to four nodes depending on the actual cluster state (i.e. GPU availability) to fulfil the request.

			```console
			#SBATCH --gpus 4
			```

			The following code requests 16 GPUs; scheduler can allocate from two up to sixteen nodes depending on the actual cluster state (i.e. GPU availability) to fulfil the request.

			```console
			#SBATCH --gpus 16
			```

			To allocate GPUs within one node you have to specify the `--nodes` option.

			The following code requests four GPUs on exactly one node

			```console
			#SBATCH --gpus 4
			#SBATCH --nodes 1
			```

			The following code requests 16 GPUs on exactly two nodes.

			```console
			#SBATCH --gpus 16
			#SBATCH --nodes 2
			```

			Alternatively, you can use the `--gpus-per-node` option.
			Only value 8 is allowed for multi-node allocation to prevent fragmenting nodes.

			The following code requests 16 GPUs on exactly two nodes.

			```console
			#SBATCH --gpus-per-node 8
			#SBATCH --nodes 2
			```

			For large jobs that require more than 16 GPU nodes (equivalent to at least 128 GPUs), the "qgpu_big" queue is designated, with a limit of 64 GPU nodes (corresponding to up to 512 GPUs).


			## Using Fat Queue

			Access [data analytics aka fat node][6].
			Fat node is divided into 32 parts, each part contains one socket/processor (24 cores) and corresponding memory.
			By default, only one part, i.e. 1/32 of the node - one processor and corresponding memory, is allocated.

			To allocate requested memory use the `--mem` option.
			Corresponding CPUs will be allocated.
			Fat node has about 22.5TB of memory available for jobs.

			```console
			#!/usr/bin/bash
			#SBATCH --job-name MyJobName
			#SBATCH --account PROJECT-ID
			#SBATCH --partition qfat
			#SBATCH --time 2:00:00
			#SBATCH --mem 6TB
			...
			```

			You can also specify CPU-oriented options (like `--cpus-per-task`), then appropriate memory will be allocated to the job.

			To allocate a whole fat node, use the `--exclusive` option

			```console
			#SBATCH --exclusive
			```

			## Using Viz Queue

			Access [visualization nodes][7].
			Every visualization node is divided into eight parts.
			By default, only one part, i.e. 1/8 of the node, is allocated.

			```console
			$ salloc -A PROJECT-ID -p qviz
			```

			To allocate a whole visualisation node, use the `--exclusive` option

			```console
			$ salloc -A PROJECT-ID -p qviz --exclusive
			```

			[1]: https://slurm.schedmd.com/
			[2]: /general/slurm-job-submission-and-execution
			[3]: https://extranet.it4i.cz/rsweb/karolina
			[4]: /karolina/compute-nodes/#compute-nodes-without-accelerators
			[5]: /karolina/compute-nodes/#compute-nodes-with-a-gpu-accelerator
			[6]: /karolina/compute-nodes/#data-analytics-compute-node
			[7]: /karolina/visualization/
			[8]: ./karolina-partitions.md
			[9]: /job-features/#cluster-monitoring
			[10]: /software/debuggers/score-p/
			No newline at end of file

docs.it4i/general/obtaining-login-credentials/obtaining-login-credentials.md

+16 −7

Original line number	Original line	Diff line number	Diff line
	@@ -5,7 +5,16 @@

	If you are not eligible for an e-INFRA CZ account, contact the [IT4I support][a] (email: [support\[at\]it4i.cz][b]) and provide the following information:		If you are not eligible for an e-INFRA CZ account, contact the [IT4I support][a] (email: [support\[at\]it4i.cz][b]) and provide the following information:

	1. Full name, country/countries of citizenship, academic affiliation, and country of affiliation		1. Personal information (required, note that without this information, you cannot use IT4I resources):
			1. Full name
			1. Gender
			1. Citizenship
			1. Country of residence
			1. Organization/affiliation
			1. Organization/affiliation country
			1. Organization/affiliation type (university, company, R&D institution, private/public sector (hospital, police), academy of sciences, etc.)
			1. Job title (student, PhD student, researcher, research assistant, employee, etc.)
			1. Project name and/or primary investigator's (PI) name. Project name consists of project type (OPEN\|DD\|EU\|ATR\|FTA\|ICA) and number in -XX-XX format, for example OPEN-33-12.
	1. Statement that you have read and accepted the [Acceptable use policy document][c] (AUP)		1. Statement that you have read and accepted the [Acceptable use policy document][c] (AUP)
	1. Attach the AUP file		1. Attach the AUP file
	1. Your preferred username (length is limited between 4 and 7 letters)<br>The preferred username must associate with your first and last name or be otherwise derived from it. Note that the system will automatically add the `it4i-` prefix to your username.		1. Your preferred username (length is limited between 4 and 7 letters)<br>The preferred username must associate with your first and last name or be otherwise derived from it. Note that the system will automatically add the `it4i-` prefix to your username.
	@@ -19,9 +28,9 @@ Subject: Access to IT4Innovations

	Dear support,		Dear support,

	Please open the user account for me and attach the account to OPEN-0-0		Please open the user account for me and attach the account to PROJECTNAME-XX-XX.
	Personal information: John Smith, USA, Department of Chemistry, MIT, MA, US		Personal information: John Smith, USA, Department of Chemistry, MIT, MA, US.
	I have read and accept the Acceptable use policy document (attached)		I have read and accept the Acceptable use policy document (attached).

	Preferred username: johnsm		Preferred username: johnsm

	@@ -64,7 +73,7 @@ e.g. providing sensitive information such as ID scan or user login/password.
	The following example is for Actalis free S/MIME certificate, but you can choose your preferred CA.		The following example is for Actalis free S/MIME certificate, but you can choose your preferred CA.

	1. Go to the [Actalis Free Email Certificate][l] request form.		1. Go to the [Actalis Free Email Certificate][l] request form.
	1. Follow the instructions: fill out the form, accept the terms and conditions, and submit the request.		1. Select the free version - Mailbox Validated - and remove the €6.00 renewal item from your cart before proceeding with the order.
	1. You will receive an email with the certificate.		1. You will receive an email with the certificate.
	1. Import the certificate to one of the supported email clients.		1. Import the certificate to one of the supported email clients.
	1. Attach a scan of photo ID (personal ID, passport, or driver license) to your email request for IT4I account.		1. Attach a scan of photo ID (personal ID, passport, or driver license) to your email request for IT4I account.
	@@ -84,7 +93,7 @@ The following example is for Actalis free S/MIME certificate, but you can choose

	[a]: https://support.it4i.cz/rt/		[a]: https://support.it4i.cz/rt/
	[b]: mailto:support@it4i.cz		[b]: mailto:support@it4i.cz
	[c]: https://www.it4i.cz/file/281883408ded04bd0961113ea33b8118/7450/AUP2022-v4-CZE-ENG.final.signed[94].pdf		[c]: https://docs.it4i.cz/general/aup/
	[d]: http://support.it4i.cz/		[d]: http://support.it4i.cz/
	[e]: https://scs.it4i.cz		[e]: https://scs.it4i.cz
	[f]: http://www.igtf.net/		[f]: http://www.igtf.net/
	@@ -93,7 +102,7 @@ The following example is for Actalis free S/MIME certificate, but you can choose
	[i]: http://www.postsignum.cz/		[i]: http://www.postsignum.cz/
	[j]: http://www.ica.cz/Kvalifikovany-certifikat.aspx		[j]: http://www.ica.cz/Kvalifikovany-certifikat.aspx
	[k]: http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/moz-cert-gen		[k]: http://idoc.vsb.cz/xwiki/wiki/infra/view/uzivatel/moz-cert-gen
	[l]: https://extrassl.actalis.it/portal/uapub/freemail?lang=en		[l]: https://www.actalis.com/s-mime-certificates
	[r]: https://www.it4i.cz/computing-resources-allocation/?lang=en		[r]: https://www.it4i.cz/computing-resources-allocation/?lang=en
	[s]: https://extranet.it4i.cz/ssp/?action=changesshkey		[s]: https://extranet.it4i.cz/ssp/?action=changesshkey
	[u]: https://www.eduid.cz/		[u]: https://www.eduid.cz/

docs.it4i/general/pbs-job-submission-and-execution.md

0 → 100644

+461 −0

Original line number	Original line	Diff line number	Diff line
			!!!warning
			This page has not been updated yet. The page does not reflect the transition from PBS to Slurm.

			# Job Submission and Execution

			## Job Submission

			When allocating computational resources for the job, specify:

			1. a suitable queue for your job (the default is qprod)
			1. the number of computational nodes (required)
			1. the number of cores per node (not required)
			1. the maximum wall time allocated to your calculation, note that jobs exceeding the maximum wall time will be killed
			1. your Project ID
			1. a Jobscript or interactive switch

			Submit the job using the `qsub` command:

			```console
			$ qsub -A Project_ID -q queue -l select=x:ncpus=y,walltime=[[hh:]mm:]ss[.ms] jobscript
			```

			The `qsub` command submits the job to the queue, i.e. it creates a request to the PBS Job manager for allocation of specified resources. The resources will be allocated when available, subject to the above described policies and constraints. After the resources are allocated, the jobscript or interactive shell is executed on the first of the allocated nodes.

			!!! note
			`ncpus=y` is usually not required, because the smallest allocation unit is an entire node. The exception are corner cases for `qviz` and `qfat` on Karolina.

			### Job Submission Examples

			```console
			$ qsub -A OPEN-0-0 -q qprod -l select=64,walltime=03:00:00 ./myjob
			```

			In this example, we allocate 64 nodes, 36 cores per node, for 3 hours. We allocate these resources via the `qprod` queue, consumed resources will be accounted to the project identified by Project ID `OPEN-0-0`. The jobscript `myjob` will be executed on the first node in the allocation.

			```console
			$ qsub -q qexp -l select=4 -I
			```

			In this example, we allocate 4 nodes, 36 cores per node, for 1 hour. We allocate these resources via the `qexp` queue. The resources will be available interactively.

			```console
			$ qsub -A OPEN-0-0 -q qnvidia -l select=10 ./myjob
			```

			In this example, we allocate 10 NVIDIA accelerated nodes, 24 cores per node, for 24 hours. We allocate these resources via the `qnvidia` queue. The jobscript `myjob` will be executed on the first node in the allocation.

			```console
			$ qsub -A OPEN-0-0 -q qfree -l select=10 ./myjob
			```

			In this example, we allocate 10 nodes, 24 cores per node, for 12 hours. We allocate these resources via the `qfree` queue. It is not required that the project `OPEN-0-0` has any available resources left. Consumed resources are still accounted for. The jobscript `myjob` will be executed on the first node in the allocation.

			All `qsub` options may be [saved directly into the jobscript][1]. In such cases, it is not necessary to specify any options for `qsub`.

			```console
			$ qsub ./myjob
			```

			By default, the PBS batch system sends an email only when the job is aborted. Disabling mail events completely can be done as follows:

			```console
			$ qsub -m n
			```

			#### Dependency Job Submission

			To submit dependent jobs in sequence, use the `depend` function of `qsub`.

			First submit the first job in a standard manner:

			```console
			$ qsub -A OPEN-0-0 -q qprod -l select=64,walltime=02:00:00 ./firstjob
			123456[].isrv1
			```

			Then submit the second job using the `depend` function:

			```console
			$ qsub -W depend=afterok:123456 ./secondjob
			```

			Both jobs will be queued, but the second job won't start until the first job has finished successfully.

			Below is the list of arguments that can be used with `-W depend=dependency:jobid`:

			\| Argument \| Description \|
			\| ----------- \| --------------------------------------------------------------- \|
			\| after \| This job is scheduled after `jobid` begins execution. \|
			\| afterok \| This job is scheduled after `jobid` finishes successfully. \|
			\| afternotok \| This job is scheduled after `jobid` finishes unsucessfully. \|
			\| afterany \| This job is scheduled after `jobid` finishes in any state. \|
			\| before \| This job must begin execution before `jobid` is scheduled. \|
			\| beforeok \| This job must finish successfully before `jobid` begins. \|
			\| beforenotok \| This job must finish unsuccessfully before `jobid` begins. \|
			\| beforeany \| This job must finish in any state before `jobid` begins. \|

			### Useful Tricks

			All `qsub` options may be [saved directly into the jobscript][1]. In such a case, no options to `qsub` are needed.

			```console
			$ qsub ./myjob
			```

			By default, the PBS batch system sends an email only when the job is aborted. Disabling mail events completely can be done like this:

			```console
			$ qsub -m n
			```

			<!--- NOT IMPLEMENTED ON KAROLINA YET

			## Advanced Job Placement

			### Salomon - Placement by Network Location

			The network location of allocated nodes in the [InfiniBand network][3] influences efficiency of network communication between nodes of job. Nodes on the same InfiniBand switch communicate faster with lower latency than distant nodes. To improve communication efficiency of jobs, PBS scheduler on Salomon is configured to allocate nodes (from currently available resources), which are as close as possible in the network topology.

			For communication intensive jobs, it is possible to set stricter requirement - to require nodes directly connected to the same InfiniBand switch or to require nodes located in the same dimension group of the InfiniBand network.

			### Salomon - Placement by InfiniBand Switch

			Nodes directly connected to the same InfiniBand switch can communicate most efficiently. Using the same switch prevents hops in the network and provides for unbiased, most efficient network communication. There are 9 nodes directly connected to every InfiniBand switch.

			!!! note
			We recommend allocating compute nodes of a single switch when the best possible computational network performance is required to run job efficiently.

			Nodes directly connected to the one InfiniBand switch can be allocated using node grouping on the PBS resource attribute `switch`.

			In this example, we request all 9 nodes directly connected to the same switch using node grouping placement.

			```console
			$ qsub -A OPEN-0-0 -q qprod -l select=9 -l place=group=switch ./myjob
			```

			-->

			## Advanced Job Handling

			### Selecting Turbo Boost Off

			!!! note
			For Barbora only.

			Intel Turbo Boost Technology is on by default. We strongly recommend keeping the default.

			If necessary (such as in the case of benchmarking), you can disable Turbo for all nodes of the job by using the PBS resource attribute `cpu_turbo_boost`:

			```console
			$ qsub -A OPEN-0-0 -q qprod -l select=4 -l cpu_turbo_boost=0 -I
			```

			More information about the Intel Turbo Boost can be found in the TurboBoost section

			### Advanced Examples

			In the following example, we select an allocation for benchmarking a very special and demanding MPI program. We request Turbo off, and 2 full chassis of compute nodes (nodes sharing the same IB switches) for 30 minutes:

			```console
			$ qsub -A OPEN-0-0 -q qprod
			-l select=18:ibswitch=isw10:mpiprocs=1:ompthreads=16+18:ibswitch=isw20:mpiprocs=16:ompthreads=1
			-l cpu_turbo_boost=0,walltime=00:30:00
			-N Benchmark ./mybenchmark
			```

			The MPI processes will be distributed differently on the nodes connected to the two switches. On the isw10 nodes, we will run 1 MPI process per node with 16 threads per process, on isw20 nodes we will run 16 plain MPI processes.

			Although this example is somewhat artificial, it demonstrates the flexibility of the qsub command options.

			## Job Management

			!!! note
			Check the status of your jobs using the `qstat` and `check-pbs-jobs` commands

			```console
			$ qstat -a
			$ qstat -a -u username
			$ qstat -an -u username
			$ qstat -f 12345.srv11
			```

			Example:

			```console
			$ qstat -a

			srv11:
			Req'd Req'd Elap
			Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time
			--------------- -------- -- \|---\|---\| ------ --- --- ------ ----- - -----
			16287.srv11 user1 qlong job1 6183 4 64 -- 144:0 R 38:25
			16468.srv11 user1 qlong job2 8060 4 64 -- 144:0 R 17:44
			16547.srv11 user2 qprod job3x 13516 2 32 -- 48:00 R 00:58
			```

			In this example user1 and user2 are running jobs named `job1`, `job2`, and `job3x`. `job1` and `job2` are using 4 nodes, 128 cores per node each. `job1` has already run for 38 hours and 25 minutes, and `job2` for 17 hours 44 minutes. So `job1`, for example, has already consumed `64 x 38.41 = 2,458.6` core-hours. `job3x` has already consumed `32 x 0.96 = 30.93` core-hours. These consumed core-hours will be [converted to node-hours][10] and accounted for on the respective project accounts, regardless of whether the allocated cores were actually used for computations.

			The following commands allow you to check the status of your jobs using the `check-pbs-jobs` command, check for the presence of user's PBS jobs' processes on execution hosts, display load and processes, display job standard and error output, and continuously display (`tail -f`) job standard or error output.

			```console
			$ check-pbs-jobs --check-all
			$ check-pbs-jobs --print-load --print-processes
			$ check-pbs-jobs --print-job-out --print-job-err
			$ check-pbs-jobs --jobid JOBID --check-all --print-all
			$ check-pbs-jobs --jobid JOBID --tailf-job-out
			```

			Examples:

			```console
			$ check-pbs-jobs --check-all
			JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
			Check session id: OK
			Check processes
			cn164: OK
			cn165: No process
			```

			In this example we see that job `35141.dm2` is not currently running any processes on the allocated node cn165, which may indicate an execution error:

			```console
			$ check-pbs-jobs --print-load --print-processes
			JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
			Print load
			cn164: LOAD: 16.01, 16.01, 16.00
			cn165: LOAD: 0.01, 0.00, 0.01
			Print processes
			%CPU CMD
			cn164: 0.0 -bash
			cn164: 0.0 /bin/bash /var/spool/PBS/mom_priv/jobs/35141.dm2.SC
			cn164: 99.7 run-task
			...
			```

			In this example, we see that job `35141.dm2` is currently running a process run-task on node `cn164`, using one thread only, while node `cn165` is empty, which may indicate an execution error.

			```console
			$ check-pbs-jobs --jobid 35141.dm2 --print-job-out
			JOB 35141.dm2, session_id 71995, user user2, nodes cn164,cn165
			Print job standard output:
			======================== Job start ==========================
			Started at : Fri Aug 30 02:47:53 CEST 2013
			Script name : script
			Run loop 1
			Run loop 2
			Run loop 3
			```

			In this example, we see the actual output (some iteration loops) of the job `35141.dm2`.

			!!! note
			Manage your queued or running jobs, using the `qhold`, `qrls`, `qdel`, `qsig`, or `qalter` commands

			You may release your allocation at any time, using the `qdel` command

			```console
			$ qdel 12345.srv11
			```

			You may kill a running job by force, using the `qsig` command

			```console
			$ qsig -s 9 12345.srv11
			```

			Learn more by reading the PBS man page

			```console
			$ man pbs_professional
			```

			## Job Execution

			### Jobscript

			!!! note
			Prepare the jobscript to run batch jobs in the PBS queue system

			The Jobscript is a user made script controlling a sequence of commands for executing the calculation. It is often written in bash, though other scripts may be used as well. The jobscript is supplied to the PBS `qsub` command as an argument, and is executed by the PBS Professional workload manager.

			!!! note
			The jobscript or interactive shell is executed on first of the allocated nodes.

			```console
			$ qsub -q qexp -l select=4 -N Name0 ./myjob
			$ qstat -n -u username

			srv11:
			Req'd Req'd Elap
			Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time
			--------------- -------- -- \|---\|---\| ------ --- --- ------ ----- - -----
			15209.srv11 username qexp Name0 5530 4 128 -- 01:00 R 00:00
			cn17/032+cn108/032+cn109/032+cn110/032
			```

			In this example, the nodes `cn17`, `cn108`, `cn109`, and `cn110` were allocated for 1 hour via the qexp queue. The `myjob` jobscript will be executed on the node `cn17`, while the nodes `cn108`, `cn109`, and `cn110` are available for use as well.

			The jobscript or interactive shell is by default executed in the `/home` directory:

			```console
			$ qsub -q qexp -l select=4 -I
			qsub: waiting for job 15210.srv11 to start
			qsub: job 15210.srv11 ready

			$ pwd
			/home/username
			```

			In this example, 4 nodes were allocated interactively for 1 hour via the `qexp` queue. The interactive shell is executed in the `/home` directory.

			!!! note
			All nodes within the allocation may be accessed via SSH. Unallocated nodes are not accessible to the user.

			The allocated nodes are accessible via SSH from login nodes. The nodes may access each other via SSH as well.

			Calculations on allocated nodes may be executed remotely via the MPI, SSH, pdsh, or clush. You may find out which nodes belong to the allocation by reading the `$PBS_NODEFILE` file

			```console
			$ qsub -q qexp -l select=4 -I
			qsub: waiting for job 15210.srv11 to start
			qsub: job 15210.srv11 ready

			$ pwd
			/home/username

			$ sort -u $PBS_NODEFILE
			cn17.bullx
			cn108.bullx
			cn109.bullx
			cn110.bullx

			$ pdsh -w cn17,cn[108-110] hostname
			cn17: cn17
			cn108: cn108
			cn109: cn109
			cn110: cn110
			```

			In this example, the hostname program is executed via `pdsh` from the interactive shell. The execution runs on all four allocated nodes. The same result would be achieved if the `pdsh` were called from any of the allocated nodes or from the login nodes.

			### Example Jobscript for MPI Calculation

			!!! note
			Production jobs must use the /scratch directory for I/O

			The recommended way to run production jobs is to change to the `/scratch` directory early in the jobscript, copy all inputs to `/scratch`, execute the calculations, and copy outputs to the `/home` directory.

			```bash
			#!/bin/bash

			cd $PBS_O_WORKDIR

			SCRDIR=/scratch/project/open-00-00/${USER}/myjob
			mkdir -p $SCRDIR

			# change to scratch directory, exit on failure
			cd $SCRDIR \|\| exit

			# copy input file to scratch
			cp $PBS_O_WORKDIR/input .
			cp $PBS_O_WORKDIR/mympiprog.x .

			# load the MPI module
			# (Always specify the module's name and version in your script;
			# for the reason, see https://docs.it4i.cz/software/modules/lmod/#loading-modules.)
			ml OpenMPI/4.1.1-GCC-10.2.0-Java-1.8.0_221

			# execute the calculation
			mpirun -pernode ./mympiprog.x

			# copy output file to home
			cp output $PBS_O_WORKDIR/.

			#exit
			exit
			```

			In this example, a directory in `/home` holds the input file input and the `mympiprog.x` executable. We create the `myjob` directory on the `/scratch` filesystem, copy input and executable files from the `/home` directory where the `qsub` was invoked (`$PBS_O_WORKDIR`) to `/scratch`, execute the MPI program `mympiprog.x` and copy the output file back to the `/home` directory. `mympiprog.x` is executed as one process per node, on all allocated nodes.

			!!! note
			Consider preloading inputs and executables onto [shared scratch][6] memory before the calculation starts.

			In some cases, it may be impractical to copy the inputs to the `/scratch` memory and the outputs to the `/home` directory. This is especially true when very large input and output files are expected, or when the files should be reused by a subsequent calculation. In such cases, it is the users' responsibility to preload the input files on the shared `/scratch` memory before the job submission, and retrieve the outputs manually after all calculations are finished.

			!!! note
			Store the `qsub` options within the jobscript. Use the `mpiprocs` and `ompthreads` qsub options to control the MPI job execution.

			### Example Jobscript for MPI Calculation With Preloaded Inputs

			Example jobscript for an MPI job with preloaded inputs and executables, options for `qsub` are stored within the script:

			```bash
			#!/bin/bash
			#PBS -q qprod
			#PBS -N MYJOB
			#PBS -l select=100:mpiprocs=1:ompthreads=16
			#PBS -A OPEN-00-00

			# job is run using project resources; here ${PBS_ACCOUNT,,} translates to "open-00-00"
			SCRDIR=/scratch/project/${PBS_ACCOUNT,,}/${USER}/myjob

			# change to scratch directory, exit on failure
			cd $SCRDIR \|\| exit

			# load the MPI module
			# (Always specify the module's name and version in your script;
			# for the reason, see https://docs.it4i.cz/software/modules/lmod/#loading-modules.)
			ml OpenMPI/4.1.1-GCC-10.2.0-Java-1.8.0_221

			# execute the calculation
			mpirun ./mympiprog.x

			#exit
			exit
			```

			In this example, input and executable files are assumed to be preloaded manually in the `/scratch/project/open-00-00/$USER/myjob` directory. Because we used the `qprod` queue, we had to specify which project's resources we want to use, and our `PBS_ACCOUNT` variable will be set accordingly (OPEN-00-00). `${PBS_ACCOUNT,,}` uses one of the bash's built-in functions to translate it into lower case.

			Note the `mpiprocs` and `ompthreads` qsub options controlling the behavior of the MPI execution. `mympiprog.x` is executed as one process per node, on all 100 allocated nodes. If `mympiprog.x` implements OpenMP threads, it will run 16 threads per node.

			### Example Jobscript for Single Node Calculation

			!!! note
			The local scratch directory is often useful for single node jobs. Local scratch memory will be deleted immediately after the job ends.

			Example jobscript for single node calculation, using [local scratch][6] memory on the node:

			```bash
			#!/bin/bash

			# change to local scratch directory
			cd /lscratch/$PBS_JOBID \|\| exit

			# copy input file to scratch
			cp $PBS_O_WORKDIR/input .
			cp $PBS_O_WORKDIR/myprog.x .

			# execute the calculation
			./myprog.x

			# copy output file to home
			cp output $PBS_O_WORKDIR/.

			#exit
			exit
			```

			In this example, a directory in `/home` holds the input file input and the executable `myprog.x`. We copy input and executable files from the `/home` directory where the `qsub` was invoked (`$PBS_O_WORKDIR`) to the local `/scratch` memory `/lscratch/$PBS_JOBID`, execute `myprog.x` and copy the output file back to the `/home directory`. `myprog.x` runs on one node only and may use threads.

			### Other Jobscript Examples

			Further jobscript examples may be found in the software section and the [Capacity computing][9] section.

			[1]: #example-jobscript-for-mpi-calculation-with-preloaded-inputs
			[2]: resources-allocation-policy.md
			[3]: ../salomon/network.md
			[5]: ../salomon/7d-enhanced-hypercube.md
			[6]: ../salomon/storage.md
			[9]: capacity-computing.md
			[10]: resources-allocation-policy.md#resource-accounting-policy

docs.it4i/general/resource-accounting.md

+20 −74

Original line number	Original line	Diff line number	Diff line
	# Resource Accounting Policy		# Resource Accounting Policy

	Starting with the 24<sup>th</sup> open access grant competition, the accounting policy has been changed from [normalized core hours (NCH)][2a] to node-hours. This means that it is now required to apply for node hours of the specific cluster and node type:		Starting with the 24<sup>th</sup> open access grant competition,
			the accounting policy has been changed from [normalized core hours (NCH)][2a] to node-hours (NH).
			This means that it is now required to apply for node hours of the specific cluster and node type:

	1. [Barbora CPU][3a]		1. [Barbora CPU][3a]
	1. [Barbora GPU][4a]		1. [Barbora GPU][4a]
	@@ -10,83 +12,29 @@ Starting with the 24<sup>th</sup> open access grant competition, the accounting
	1. [Karolina GPU][8a]		1. [Karolina GPU][8a]
	1. [Karolina FAT][9a]		1. [Karolina FAT][9a]

	The accounting runs whenever the nodes are allocated via the PBS Pro workload manager (the `qsub` command), regardless of whether		The accounting runs whenever the nodes are allocated via the Slurm workload manager (the `sbatch`, `salloc` command),
	the nodes are actually used for any calculation. The same rule applies for unspent [reservations][10a].		regardless of whether the nodes are actually used for any calculation.
			The same rule applies for unspent [reservations][10a].

	## Conversion Table		## Resource Accounting Formula

	\| Resources \| Conversion for 1 node-hour \|		\| Resources \| NH Consumed \|
	\| ------------ \| ----------------------- \|		\| ------------------------------- \| ---------------------------- \|
	\| Barbora CPU \| 36 core-hours \|		\| Barbora All types, Karolina CPU \| allocated nodes \* time \|
	\| Barbora GPU \| 4 GPU hours \|		\| Karolina GPU \| allocated gpus \* time / 8 \|
	\| Barbora FAT \| 128 core-hours \|		\| Karolina FAT \| allocated cpus \* time / 768 \|
	\| DGX-2 \| 16 GPU hours \|		\| Karolina VIZ \| allocated cpus \* time / 64 \|
	\| Karolina CPU \| 128 core-hours \|
	\| Karolina GPU \| 8 GPU hours \|
	\| Karolina FAT \| 768 core-hours \|

	## Original Resource Accounting Policy		time: duration of the Slurm job in hours

	The original policy, as stated below, is still applied to projects from previous grant competitions.		!!! important "CPU/GPU resources granularity"

	### Wall-Clock Core-Hours WCH		Minimal granularity of all Barbora's partitions and Karolina's CPU partition is 1 node.
			This means that if you request, for example, 32 cores on Karolina's CPU partition,
			your job will still consume 1 NH \* time.

	The wall-clock core-hours (WCH) are the basic metric of computer utilization time.		All other Karolina's partitions (GPU, FAT, VIZ) provide partial node allocation;
	1 wall-clock core-hour is defined as 1 processor core allocated for 1 hour of wall-clock time. For example, allocating a full node (i.e. 36 cores) on Barbora for 1 hour amounts to 36 wall-clock core-hours.		i.e.: if you request 4 GPUs on Karolina, you will consume only 0.5 NH \* time.

	### Normalized Core-Hours NCH

	The resources subject to accounting are the normalized core-hours (NCH).
	The normalized core-hours are obtained from WCH by applying a normalization factor:

	$$
	NCH = F*WCH
	$$

	All jobs are accounted in normalized core-hours, using factor F valid at the time of the execution:

	\| System \| F \|
	\| --------------\| ---: \|
	\| Karolina \| 1.00 \|
	\| Barbora CPU \| 1.40 \|
	\| Barbora GPU \| 4.50 \|
	\| DGX-2 \| 5.50 \|

	Factors are valid as of July 9, 2022.

	The normalized core-hours were introduced to treat systems of different age on equal footing.
	Normalized core-hour is an accounting tool to discount the legacy systems.

	See examples in the [Job submission and execution][1a] section.

	### Consumed Resources

	Check how many core-hours have been consumed. The command `it4ifree` is available on cluster login nodes.

	```console
	$ it4ifree

	Projects I am participating in
	==============================
	PID Days left Total Used WCHs Used NCHs WCHs by me NCHs by me Free
	---------- ----------- ------- ----------- ----------- ------------ ------------ -------
	OPEN-XX-XX 323 0 5169947 5169947 50001 50001 1292555


	Projects I am Primarily Investigating
	=====================================
	PID Login Used WCHs Used NCHs
	---------- ---------- ----------- -----------
	OPEN-XX-XX user1 376670 376670
	user2 4793277 4793277

	Legend
	======
	WCH = Wall-clock Core Hour
	NCH = Normalized Core Hour
	```

	The `it4ifree` command is a part of the `it4i.portal.clients` package, located [here][pypi].

	[1a]: job-submission-and-execution.md		[1a]: job-submission-and-execution.md
	[2a]: #normalized-core-hours-nch		[2a]: #normalized-core-hours-nch
	@@ -98,5 +46,3 @@ The `it4ifree` command is a part of the `it4i.portal.clients` package, located [
	[8a]: ../../karolina/compute-nodes/#compute-nodes-with-a-gpu-accelerator		[8a]: ../../karolina/compute-nodes/#compute-nodes-with-a-gpu-accelerator
	[9a]: ../../karolina/compute-nodes/#data-analytics-compute-node		[9a]: ../../karolina/compute-nodes/#data-analytics-compute-node
	[10a]: resource_allocation_and_job_execution.md#resource-reservation		[10a]: resource_allocation_and_job_execution.md#resource-reservation

	[pypi]: https://pypi.python.org/pypi/it4i.portal.clients

docs.it4i/general/resource_allocation_and_job_execution.md

+20 −41

Original line number	Original line	Diff line number	Diff line
	# Resource Allocation and Job Execution		# How to Run Jobs

	!!! important "Barbora migrating to Slurm"		## Job Submission and Execution
	Starting July 19. 9AM, we are migrating the Barbora's workload manager from PBS to Slurm.
	For more information on how to submit jobs in Slurm, see the [Slurm Job Submission and Execution][8] section.

	To run a [job][1], computational resources for this particular job must be allocated. This is done via the [PBS Pro][b] job workload manager software, which distributes workloads across the supercomputer. Extensive information about PBS Pro can be found in the [PBS Pro User's Guide][2].		To run a [job][1], computational resources for this particular job must be allocated. This is done via the [Slurm][a] job workload manager software, which distributes workloads across the supercomputer.

	## Resource Allocation Policy		The `sbatch` or `salloc` command creates a request to the Slurm job manager for allocation of specified resources.
			The resources will be allocated when available, subject to allocation policies and constraints.
			After the resources are allocated, the jobscript or interactive shell is executed on first of the allocated nodes.

	Resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. [The Fair-share][3] ensures that individual users may consume approximately equal amount of resources per week. The resources are accessible via queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources.		Read more on the [Job Submission and Execution][5] page.

	### Resource Reservation		## Resource Allocation Policy

	You can request a reservation of a specific number, range, or type of computational resources at [support@it4i.cz][d].		Resources are allocated to the job in a fair-share fashion, subject to constraints set by the queue and resources available to the Project. [The Fair-share][3] ensures that individual users may consume approximately equal amount of resources per week. The resources are accessible via queues for queueing the jobs. The queues provide prioritized and exclusive access to the computational resources.
	Note that unspent reserved node-hours count towards the total computational resources used.

	!!! note		!!! note
	See the queue status for [Karolina][a] or [Barbora][c].		See the queue status for [Karolina][d] or [Barbora][e].

	Read more on the [Resource Allocation Policy][4] page.		Read more on the [Resource Allocation Policy][4] page.

	## Job Submission and Execution		## Resource Reservation

	The `qsub` command creates a request to the PBS Job manager for allocation of specified resources. The smallest allocation unit is an entire node, with the exception of the `qexp` queue. The resources will be allocated when available, subject to allocation policies and constraints. After the resources are allocated, the jobscript or interactive shell is executed on first of the allocated nodes.

	Read more on the [Job Submission and Execution][5] page.

	## Capacity Computing		You can request a reservation of a specific number, range, or type of computational resources at [support@it4i.cz][c].
			Note that unspent reserved node-hours count towards the total computational resources used.
	!!! note
	Use Job arrays when running huge number of jobs.

	Use GNU Parallel and/or Job arrays when running (many) single core jobs.

	In many cases, it is useful to submit a huge (100+) number of computational jobs into the PBS queue system. A huge number of (small) jobs is one of the most effective ways to execute parallel calculations, achieving best runtime, throughput and computer utilization. In this chapter, we discuss the recommended way to run huge numbers of jobs, including ways to run huge numbers of single core jobs.

	Read more on the [Capacity Computing][6] page.

	## Vnode Allocation

	The `qgpu` queue on Karolina takes advantage of the division of nodes into vnodes. Accelerated node equipped with two 64-core processors and eight GPU cards is treated as eight vnodes, each containing 16 CPU cores and 1 GPU card. Vnodes can be allocated to jobs individually –⁠ through precise definition of resource list at job submission, you may allocate varying number of resources/GPU cards according to your needs.

	Red more on the [Vnode Allocation][7] page.

	[1]: ../index.md#terminology-frequently-used-on-these-pages		[1]: ../index.md#terminology-frequently-used-on-these-pages
	[2]: ../pbspro.md		[2]: https://slurm.schedmd.com/documentation.html
	[3]: job-priority.md#fair-share-priority		[3]: job-priority.md#fair-share-priority
	[4]: resources-allocation-policy.md		[4]: resources-allocation-policy.md
	[5]: job-submission-and-execution.md		[5]: job-submission-and-execution.md
	[6]: capacity-computing.md
	[7]: vnode-allocation.md		[a]: https://slurm.schedmd.com/
	[8]: slurm-job-submission-and-execution.md		[b]: https://slurm.schedmd.com/documentation.html
			[c]: mailto:support@it4i.cz
	[a]: https://extranet.it4i.cz/rsweb/karolina/queues		[d]: https://extranet.it4i.cz/rsweb/karolina/queues
	[b]: https://www.altair.com/pbs-works/		[e]: https://extranet.it4i.cz/rsweb/barbora/queues
	[c]: https://extranet.it4i.cz/rsweb/barbora/queues
	[d]: mailto:support@it4i.cz

docs.it4i/general/resources-allocation-policy.md

+18 −27

Original line number	Original line	Diff line number	Diff line
	@@ -14,36 +14,24 @@ Computational resources are subject to [accounting policy][7].

	!!! important		!!! important
	Queues are divided based on a resource type: `qcpu_` for non-accelerated nodes and `qgpu_` for accelerated nodes. <br><br>		Queues are divided based on a resource type: `qcpu_` for non-accelerated nodes and `qgpu_` for accelerated nodes. <br><br>
	On the Karolina's `qgpu` queue, you can now allocate 1/8 of the node - 1 GPU and 16 cores. For more information, see [Allocation of vnodes on qgpu][4].<br><br>		EuroHPC queues are no longer available. If you are an EuroHPC user, use standard queues based on allocated/required type of resources.

	### New Queues		### Queues

	\| <div style="width:86px">Queue</div>\| Description \|		\| <div style="width:86px">Queue</div>\| Description \|
	\| -------------------------------- \| ----------- \|		\| -------------------------------- \| ----------- \|
	\| `qcpu` \| Production queue for non-accelerated nodes intended for standard production runs. Requires an active project with nonzero remaining resources. Full nodes are allocated. Identical to `qprod`. \|		\| `qcpu` \| Production queue for non-accelerated nodes intended for standard production runs. Requires an active project with nonzero remaining resources. Full nodes are allocated. Identical to `qprod`. \|
	\| `qgpu` \| Dedicated queue for accessing the NVIDIA accelerated nodes. Requires an active project with nonzero remaining resources. It utilizes 8x NVIDIA A100 with 320GB HBM2 memory per node. The PI needs to explicitly ask support for authorization to enter the queue for all users associated with their project. On Karolina, you can allocate 1/8 of the node - 1 GPU and 16 cores. For more information, see [Allocation of vnodes on qgpu][4]. \|		\| `qgpu` \| Dedicated queue for accessing the NVIDIA accelerated nodes. Requires an active project with nonzero remaining resources. It utilizes 8x NVIDIA A100 with 320GB HBM2 memory per node. The PI needs to explicitly ask support for authorization to enter the queue for all users associated with their project. On Karolina, you can allocate 1/8 of the node - 1 GPU and 16 cores. For more information, see [Karolina qgpu allocation][4]. \|
			\| `qgpu_big` \| Intended for big jobs (>16 nodes), queue priority is lower than production queue prority, priority is temporarily increased every even weekend. \|
	\| `qcpu_biz`<br>`qgpu_biz` \| Commercial queues, slightly higher priority. \|		\| `qcpu_biz`<br>`qgpu_biz` \| Commercial queues, slightly higher priority. \|
	\| `qcpu_eurohpc`<br>`qgpu_eurohpc` \| EuroHPC queues, slightly higher priority, Karolina only. \|
	\| `qcpu_exp`<br>`qgpu_exp` \| Express queues for testing and running very small jobs. There are 2 nodes always reserved (w/o accelerators), max 8 nodes available per user. The nodes may be allocated on a per core basis. It is configured to run one job and accept five jobs in a queue per user. \|		\| `qcpu_exp`<br>`qgpu_exp` \| Express queues for testing and running very small jobs. There are 2 nodes always reserved (w/o accelerators), max 8 nodes available per user. The nodes may be allocated on a per core basis. It is configured to run one job and accept five jobs in a queue per user. \|
	\| `qcpu_free`<br>`qgpu_free` \| Intended for utilization of free resources, after a project exhausted all its allocated resources. Note that the queue is not free of charge. [Normal accounting][2] applies. (Does not apply to DD projects by default. DD projects have to request for permission after exhaustion of computational resources.). Consumed resources will be accounted to the Project. Access to the queue is removed if consumed resources exceed 150% of the allocation. Full nodes are allocated. \|		\| `qcpu_free`<br>`qgpu_free` \| Intended for utilization of free resources, after a project exhausted all its allocated resources. Note that the queue is not free of charge. [Normal accounting][2] applies. Consumed resources will be accounted to the Project. Access to the queue is removed if consumed resources exceed 150% of the allocation. Full nodes are allocated. \|
	\| `qcpu_long` \| Queues for long production runs. Require an active project with nonzero remaining resources. Only 200 nodes without acceleration may be accessed. Full nodes are allocated. \|		\| `qcpu_long` \| Queues for long production runs. Require an active project with nonzero remaining resources. Only 200 nodes without acceleration may be accessed. Full nodes are allocated. \|
	\| `qcpu_preempt`<br>`qgpu_preempt` \| Free queues with the lowest priority (LP). The queues require a project with allocation of the respective resource type. There is no limit on resource overdraft. Jobs are killed if other jobs with a higher priority (HP) request the nodes and there are no other nodes available. LP jobs are automatically re-queued once HP jobs finish, so make sure your jobs are re-runnable. \|		\| `qcpu_preempt`<br>`qgpu_preempt` \| Free queues with the lowest priority (LP). The queues require a project with allocation of the respective resource type. There is no limit on resource overdraft. Jobs are killed if other jobs with a higher priority (HP) request the nodes and there are no other nodes available. LP jobs are automatically re-queued once HP jobs finish, so make sure your jobs are re-runnable. \|
	\| `qdgx` \| Queue for DGX-2, accessible from Barbora. \|		\| `qdgx` \| Queue for DGX-2, accessible from Barbora. \|
	\| `qfat` \| Queue for fat node, PI must request authorization to enter the queue for all users associated to their project. \|		\| `qfat` \| Queue for fat node, PI must request authorization to enter the queue for all users associated to their project. \|
	\| `qviz` \| Visualization queue Intended for pre-/post-processing using OpenGL accelerated graphics. Each user gets 8 cores of a CPU allocated (approx. 64 GB of RAM and 1/8 of the GPU capacity (default "chunk")). If more GPU power or RAM is required, it is recommended to allocate more chunks (with 8 cores each) up to one whole node per user. This is currently also the maximum allowed allocation per one user. One hour of work is allocated by default, the user may ask for 2 hours maximum. \|		\| `qviz` \| Visualization queue Intended for pre-/post-processing using OpenGL accelerated graphics. Each user gets 8 cores of a CPU allocated (approx. 64 GB of RAM and 1/8 of the GPU capacity (default "chunk")). If more GPU power or RAM is required, it is recommended to allocate more chunks (with 8 cores each) up to one whole node per user. This is currently also the maximum allowed allocation per one user. One hour of work is allocated by default, the user may ask for 2 hours maximum. \|

	### Legacy Queues

	Legacy queues stay in production until early 2023.

	\| Legacy queue \| Replaced by \|
	\| ------------ \| ------------------------- \|
	\| `qexp` \| `qcpu_exp` & `qgpu_exp` \|
	\| `qprod` \| `qcpu` \|
	\| `qlong` \| `qcpu_long` \|
	\| `nvidia` \| `qgpu` Note that unlike in new queues, only full nodes can be allocated. \|
	\| `qfree` \| `qcpu_free` & `qgpu_free` \|

	See the following subsections for the list of queues:		See the following subsections for the list of queues:

	* [Karolina queues][5]		* [Karolina queues][5]
	@@ -51,28 +39,31 @@ See the following subsections for the list of queues:

	## Queue Notes		## Queue Notes

	The job wallclock time defaults to half the maximum time, see the table above. Longer wall time limits can be [set manually, see examples][3].		The job time limit defaults to half the maximum time, see the table above.
			Longer time limits can be [set manually, see examples][3].

	Jobs that exceed the reserved wall clock time (Req'd Time) get killed automatically. The wall clock time limit can be changed for queuing jobs (state Q) using the `qalter` command, however it cannot be changed for a running job (state R).		Jobs that exceed the reserved time limit get killed automatically.
			The time limit can be changed for queuing jobs (state Q) using the `scontrol modify job` command,
			however it cannot be changed for a running job.

	## Queue Status		## Queue Status

	!!! tip		!!! tip
	Check the status of jobs, queues and compute nodes [here][c].		Check the status of jobs, queues and compute nodes [here][c].

	![rspbs web interface](../img/barbora_cluster_usage.png)		![rsweb interface](../img/barbora_cluster_usage.png)

	Display the queue status:		Display the queue status:

	```console		```console
	$ qstat -q		$ sinfo -s
	```		```

	The PBS allocation overview may also be obtained using the `rspbs` command:		The Slurm allocation overview may also be obtained using the `rsslurm` command:

	```console		```console
	$ rspbs		$ rsslurm
	Usage: rspbs [options]		Usage: rsslurm [options]

	Options:		Options:
	--version show program's version number and exit		--version show program's version number and exit
	@@ -93,9 +84,9 @@ Options:
	[1]: job-priority.md		[1]: job-priority.md
	[2]: #resource-accounting-policy		[2]: #resource-accounting-policy
	[3]: job-submission-and-execution.md		[3]: job-submission-and-execution.md
	[4]: ./vnode-allocation.md		[4]: karolina-slurm.md
	[5]: ./karolina-queues.md		[5]: ./karolina-partitions.md
	[6]: ./barbora-queues.md		[6]: ./barbora-partitions.md
	[7]: ./resource-accounting.md		[7]: ./resource-accounting.md

	[a]: https://support.it4i.cz/rt/		[a]: https://support.it4i.cz/rt/

docs.it4i/general/shell-and-data-access.md

+101 −7

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/slurm-batch-examples.md

0 → 100644

+15 −0

Original line number	Original line	Diff line number	Diff line
			---
			hide:

			- toc

			---

			# Slurm Batch Jobs Examples

			Below is an excerpt from the [2024 e-INFRA CZ conference][1]
			describing best practices for Slurm batch calculations and data managing, including examples, by Ondrej Meca.

			![PDF presentation on Slurm Batch Jobs Examples](../src/srun_karolina.pdf){ type=application/pdf style="min-height:100vh;width:100%" }

			[1]: https://www.e-infra.cz/en/e-infra-cz-conference
			No newline at end of file

docs.it4i/general/slurm-job-submission-and-execution.md

+47 −25

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/slurmtopbs.md

deleted100644 → 0

+0 −50

File deleted.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/tools/.gitkeep

0 → 100644

+0 −0

Original line number	Original line	Diff line number	Diff line

docs.it4i/general/tools/cicd.md

0 → 100644

+120 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/tools/cli-client-tools.md

0 → 100644

+139 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/tools/codeit4i.md

0 → 100644

+31 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/opencode.md→docs.it4i/general/tools/opencode.md

+6 −6

File changed and moved.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/tools/tools-list.md

0 → 100644

+22 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/general/vnode-allocation.md

deleted100644 → 0

+0 −147

File deleted.

Preview size limit exceeded, changes collapsed.

docs.it4i/img/IQM-Czech-PR.png

0 → 100644

+1.11 MiB

1.11 MiB

docs.it4i/img/IQMStar24.png

0 → 100644

+195 KiB

194.80 KiB

docs.it4i/img/blender.png

0 → 100644

+584 KiB

584.44 KiB

docs.it4i/img/blender1.png

0 → 100644

+134 KiB

133.50 KiB

docs.it4i/img/blender2.png

0 → 100644

+1.69 MiB

1.69 MiB

docs.it4i/img/cs/guides/p10_numa_sc4_flat.png

0 → 100644

+133 KiB

133.32 KiB

docs.it4i/img/cs/guides/p10_stream_dram.png

0 → 100644

+37.9 KiB

37.86 KiB

docs.it4i/img/cs/guides/p10_stream_hbm.png

0 → 100644

+35.8 KiB

35.75 KiB

docs.it4i/img/cs/guides/p10_stream_memkind.png

0 → 100644

+35.7 KiB

35.67 KiB

docs.it4i/img/cudaq.png

0 → 100644

+30.2 KiB

30.25 KiB

docs.it4i/img/dis_clluster.png

0 → 100644

+43.3 KiB

43.27 KiB

docs.it4i/img/floatingip.png

0 → 100644

+32.9 KiB

32.95 KiB

docs.it4i/img/gpu.png

0 → 100644

+254 KiB

254.37 KiB

docs.it4i/img/horizon.png

0 → 100644

+53.9 KiB

53.87 KiB

docs.it4i/img/instance.png

0 → 100644

+22.1 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/instance1.png

0 → 100644

+41.4 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/instance2.png

0 → 100644

+60.1 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/instance3.png

0 → 100644

+46.2 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/instance4.png

0 → 100644

+45.2 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/instance5.png

0 → 100644

+43.4 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-ci.png

0 → 100644

+90.8 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-ci.svg

0 → 100644

+147 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/img/it4i-cz-128.png

0 → 100644

+6.39 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-cz-256.png

0 → 100644

+11 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-cz-512.png

0 → 100644

+20.3 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-cz.png

0 → 100644

+26 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-en-128.png

0 → 100644

+6.07 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-en-256.png

0 → 100644

+10.4 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-en-512.png

0 → 100644

+18.8 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/it4i-en.png

0 → 100644

+23.3 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/keypairs.png

0 → 100644

+23.6 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/keypairs1.png

0 → 100644

+15.4 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/login.png

0 → 100644

+14.1 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/monitor_job.png

0 → 100644

+41.4 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/mount.png

0 → 100644

+107 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/ood-ansys.png

0 → 100644

+49.3 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/paraview.png

0 → 100644

+212 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/paraview1.png

0 → 100644

+67.4 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/paraview2.png

0 → 100644

+786 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/securityg.png

0 → 100644

+32.2 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/securityg1.png

0 → 100644

+52.8 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/securityg2.png

0 → 100644

+38.7 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/sshfs.png

0 → 100644

+31 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/sshfs1.png

0 → 100644

+62.9 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/sshfs2.png

0 → 100644

+40.5 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/img/vmware.png

0 → 100644

+29.5 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/index.md

+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/job-features.md

+49 −33

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/karolina/introduction.md

+5 −2

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/karolina/storage.md

+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/lumi/.gitkeep

0 → 100644

+0 −0

Original line number	Original line	Diff line number	Diff line

docs.it4i/lumi.md→docs.it4i/lumi/about.md

+41 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

docs.it4i/lumi/lumiai.md

0 → 100644

+23 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/lumi/openfoam.md

0 → 100644

+87 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/lumi/pytorch.md

0 → 100644

+207 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/lumi/software.md

0 → 100644

+65 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/lumi/support.md

0 → 100644

+36 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/pbspro.md

deleted100644 → 0

+0 −11

File deleted.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/bio/omics-master/overview.md

+3 −3

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/cae/comsol/comsol-multiphysics.md

+63 −35

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/chemistry/gaussian.md

+10 −19

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/chemistry/molpro.md

+23 −12

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/chemistry/nwchem.md

+13 −6

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/chemistry/orca.md

+129 −96

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/chemistry/phono3py.md

+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/chemistry/phonopy.md

+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/data-science/dask.md

+9 −9

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/debuggers/allinea-ddt.md

+3 −3

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/debuggers/allinea-performance-reports.md

+5 −4

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/debuggers/intel-vtune-amplifier.md

+2 −2

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/debuggers/intel-vtune-profiler.md

+3 −3

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/debuggers/total-view.md

+2 −2

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/eessi.md

0 → 100644

+57 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/isv_licenses.md

+1 −32

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/karolina-compilation.md

+53 −14

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/mpi/img/jupyter_new.png

0 → 100755

+37.4 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/software/mpi/img/jupyter_ood_start.png

0 → 100755

+40.1 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/software/mpi/img/jupyter_run.png

0 → 100755

+47.1 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/software/mpi/img/ood_jupyter.png

0 → 100755

+87 KiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/software/mpi/jupyter_mpi.md

0 → 100644

+332 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/mpi/mpi.md

+4 −0

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/mpi/mpi4py-mpi-for-python.md

+6 −10

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/numerical-languages/matlab.md

+244 −143

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/numerical-languages/octave.md

+5 −5

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/numerical-languages/r.md

+13 −12

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/nvidia-cuda-q.md

0 → 100644

+68 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/nvidia-cuda.md

+4 −4

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/nvidia-hip.md

+4 −5

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/ansys/ansys-cfx.md

+29 −17

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/ansys/ansys-fluent.md

+57 −34

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/ansys/ansys-ls-dyna.md

+32 −16

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/ansys/ansys-mechanical-apdl.md

+27 −15

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/ansys/ansys.md

+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/ansys/workbench.md

+24 −14

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/singularity-it4i.md→docs.it4i/software/tools/apptainer.md

+4 −4

File changed and moved.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/tools/virtualization.md

+5 −5

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/viz/NICEDCVsoftware.md

+13 −19

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/viz/gpi2.md

+3 −0

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/viz/insitu.md

+3 −3

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/viz/openfoam.md

+14 −13

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/viz/paraview.md

+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/viz/vgl.md

+6 −5

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/software/vllm_deepseek.md

0 → 100644

+181 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/src/css.css

+6 −0

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/src/qnn_example.txt

0 → 100644

+269 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/src/srun_karolina.pdf

0 → 100644

+1.36 MiB

File added.

Preview size limit exceeded, changes collapsed.

View file

docs.it4i/storage/archive-storage.md

0 → 100644

+41 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/storage/awscli.md

0 → 100644

+50 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/storage/cesnet-s3.md

0 → 100644

+46 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/storage/proj4-storage.md

0 → 100644

+168 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/storage/project-storage.md

+11 −13

File changed.

Preview size limit exceeded, changes collapsed.

docs.it4i/storage/s3cmd.md

0 → 100644

+75 −0

File added.

Preview size limit exceeded, changes collapsed.

docs.it4i/vlq/introduction.md

0 → 100644

+19 −0

File added.

Preview size limit exceeded, changes collapsed.

mkdocs.yml

+83 −24

File changed.

Preview size limit exceeded, changes collapsed.

requirements.txt

+1 −0

File changed.

Preview size limit exceeded, changes collapsed.

scripts/get_modules.sh

+6 −0

File changed.

Preview size limit exceeded, changes collapsed.

scripts/maketitle.py

0 → 100644

+81 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/meta-json.sh

0 → 100644

+11 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/modules_matrix.py

+38 −33

File changed.

Preview size limit exceeded, changes collapsed.

scripts/movefiles.sh

0 → 100644

+12 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/movepublic.sh

0 → 100644

+10 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/preklopeni_dokumentace/html_md.sh

deleted100755 → 0

+0 −55

File deleted.

Preview size limit exceeded, changes collapsed.

scripts/titlemd.py

+35 −47

File changed.

Preview size limit exceeded, changes collapsed.

scripts/url_interni_test.py

0 → 100644

+67 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/url_test.py

0 → 100644

+37 −0

File added.

Preview size limit exceeded, changes collapsed.

snippets/job_sort_formula.md

+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

Original line number	Original line	Diff line number	Diff line
			# IT4I Cloud Quotas

			\| Resource \| Quota \|
			\|---------------------------------------\|-------\|
			\| Instances \| 10 \|
			\| VCPUs \| 20 \|
			\| RAM \| 32GB \|
			\| Volumes \| 20 \|
			\| Volume Snapshots \| 12 \|
			\| Volume Storage \| 500 \|
			\| Floating-IPs \| 1 \|
			\| Security Groups \| 10 \|
			\| Security Group Rules \| 100 \|
			\| Networks \| 1 \|
			\| Ports \| 10 \|
			\| Routers \| 1 \|
			\| Backups \| 12 \|
			\| Groups \| 10 \|
			\| rbac_policies \| 10 \|
			\| Subnets \| 1 \|
			\| Subnet_pools \| -1 \|
			\| Fixed-ips \| -1 \|
			\| Injected-file-size \| 10240 \|
			\| Injected-path-size \| 255 \|
			\| Injected-files \| 5 \|
			\| Key-pairs \| 100 \|
			\| Properties \| 128 \|
			\| Server-groups \| 10 \|
			\| Server-group-members \| 10 \|
			\| Backup-gigabytes \| 1002 \|
			\| Per-volume-gigabytes \| -1 \|

Original line number	Original line	Diff line number	Diff line
			---
			hide:

			- toc

			---

			# Barbora Partitions

			!!! important
			Active [project membership][1] is required to run jobs.

			Below is the list of partitions available on the Barbora cluster:

			\| Partition \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (def/max) \|
			\| ---------------- \| -------------------- \| -------------------------- \| --------- \| -------- \| ------------- \| ------------------ \|
			\| qcpu \| > 0 \| 190 \| 36 \| 2 \| no \| 24 / 48h \|
			\| qcpu_biz \| > 0 \| 190 \| 36 \| 3 \| no \| 24 / 48h \|
			\| qcpu_exp \| < 150% of allocation \| 16 \| 36 \| 4 \| no \| 1 / 1h \|
			\| qcpu_free \| < 150% of allocation \| 124<br>max 4 per job \| 36 \| 1 \| no \| 12 / 18h \|
			\| qcpu_long \| > 0 \| 60<br>max 20 per job \| 36 \| 2 \| no \| 72 / 144h \|
			\| qcpu_preempt \| active Barbora<br>CPU alloc. \| 190<br>max 4 per job \| 36 \| 0 \| no \| 12 / 12h \|
			\| qgpu \| > 0 \| 8 \| 24 \| 2 \| yes \| 24 / 48h \|
			\| qgpu_biz \| > 0 \| 8 \| 24 \| 3 \| yes \| 24 / 48h \|
			\| qgpu_exp \| < 150% of allocation \| 4<br>max 1 per job \| 24 \| 4 \| no \| 1 / 1h \|
			\| qgpu_free \| < 150% of allocation \| 5<br>max 2 per job \| 24 \| 1 \| no \| 12 / 18h \|
			\| qgpu_preempt \| active Barbora<br>GPU alloc. \| 4<br>max 2 per job \| 24 \| 0 \| no \| 12 / 12h \|
			\| qdgx \| > 0 \| cn202 \| 96 \| 2 \| yes \| 4 / 48h \|
			\| qviz \| > 0 \| 2 with NVIDIA Quadro P6000 \| 4 \| 2 \| no \| 1 / 8h \|
			\| qfat \| > 0 \| 1 fat node \| 128 \| 2 \| yes \| 24 / 48h \|

			[1]: access/project-access.md

Original line number	Original line	Diff line number	Diff line
	# Barbora Queues

	Below is the list of queues available on the Barbora cluster:

	\| Queue \| Active project \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (default/max) \|
	\| ---------------- \| -------------- \| -------------------- \| -------------------------------- \| --------- \| -------- \| ------------- \| ---------------------- \|
	\| qcpu \| yes \| > 0 \| 190 nodes \| 36 \| 0 \| no \| 24 / 48h \|
	\| qcpu_biz \| yes \| > 0 \| 190 nodes \| 36 \| 50 \| no \| 24 / 48h \|
	\| qcpu_exp \| yes \| none required \| 16 nodes \| 36 \| 150 \| no \| 1 / 1h \|
	\| qcpu_free \| yes \| < 150% of allocation \| 124 nodes<br>max 4 nodes per job \| 36 \| -100 \| no \| 12 / 18h \|
	\| qcpu_long \| yes \| > 0 \| 60 nodes<br>max 20 nodes per job \| 36 \| 0 \| no \| 72 / 144h \|
	\| qcpu_preempt \| yes \| > 0 \| 190 nodes<br>max 4 nodes per job \| 36 \| -200 \| no \| 12 / 12h \|
	\| qgpu \| yes \| > 0 \| 8 nodes \| 24 \| 0 \| yes \| 24 / 48h \|
	\| qgpu_biz \| yes \| > 0 \| 8 nodes \| 24 \| 50 \| yes \| 24 / 48h \|
	\| qgpu_exp \| yes \| none required \| 4 nodes<br>max 1 node per job \| 24 \| 0 \| no \| 1 / 1h \|
	\| qgpu_free \| yes \| < 150% of allocation \| 5 nodes<br>max 2 nodes per job \| 24 \| -100 \| no \| 12 / 18h \|
	\| qgpu_preempt \| yes \| > 0 \| 4 nodes<br>max 2 nodes per job \| 24 \| -200 \| no \| 12 / 12h \|
	\| qdgx \| yes \| > 0 \| cn202 \| 96 \| 0 \| yes \| 4 / 48h \|
	\| qviz \| yes \| none required \| 2 nodes with NVIDIA Quadro P6000 \| 4 \| 0 \| no \| 1 / 8h \|
	\| qfat \| yes \| > 0 \| 1 fat node \| 128 \| 0 \| yes \| 24 / 48h \|
	\| Legacy Queues \|
	\| qexp \| no \| none required \| 16 nodes<br>max 4 nodes per job \| 36 \| 150 \| no \| 1 / 1h \|
	\| qprod \| yes \| > 0 \| 190 nodes w/o accelerator \| 36 \| 0 \| no \| 24 / 48h \|
	\| qlong \| yes \| > 0 \| 60 nodes w/o accelerator<br>max 20 nodes per job \| 36 \| 0 \| no \| 72 / 144h \|
	\| qnvidia \| yes \| > 0 \| 8 NVIDIA nodes \| 24 \| 0 \| yes \| 24 / 48h \|
	\| qfree \| yes \| < 150% of allocation \| 192 w/o accelerator<br>max 32 nodes per job \| 36 \| -100 \| no \| 12 / 12h \|

Original line number	Original line	Diff line number	Diff line
			---
			hide:

			- toc

			---

			# Karolina Partitions

			!!! important
			Active [project membership][1] is required to run jobs.

			Below is the list of partitions available on the Karolina cluster:

			\| Partition \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (def/max) \|
			\| ---------------- \| -------------------- \| --------------------------------------------------------- \| ----------- \| -------- \| ------------- \| ------------------ \|
			\| qcpu \| > 0 \| 720 \| 128 \| 2 \| no \| 24 / 48h \|
			\| qcpu_biz \| > 0 \| 720 \| 128 \| 3 \| no \| 24 / 48h \|
			\| qcpu_exp \| < 150% of allocation \| 720<br>max 2 per user \| 128 \| 4 \| no \| 1 / 1h \|
			\| qcpu_free \| < 150% of allocation \| 720<br>max 4 per job \| 128 \| 1 \| no \| 12 / 18h \|
			\| qcpu_long \| > 0 \| 200<br>max 20 per job, only non-accelerated nodes allowed \| 128 \| 2 \| no \| 72 / 144h \|
			\| qcpu_preempt \| active Karolina<br> CPU alloc. \| 720<br>max 4 per job \| 128 \| 0 \| no \| 12 / 12h \|
			\| qgpu \| > 0 \| 72<br>max 16 per job \| 16<br>1 gpu \| 3 \| yes \| 24 / 48h \|
			\| qgpu_big \| > 0 \| 72<br>max 64 per job \| 128 \| 2 \| yes \| 12 / 12h \|
			\| qgpu_biz \| > 0 \| 72<br>max 16 per job \| 128 \| 4 \| yes \| 24 / 48h \|
			\| qgpu_exp \| < 150% of allocation \| 4<br>max 1 per job \| 16<br>1 gpu \| 5 \| no \| 1 / 1h \|
			\| qgpu_free \| < 150% of allocation \| 46<br>max 2 per job \| 16<br>1 gpu \| 1 \| no \| 12 / 18h \|
			\| qgpu_preempt \| active Karolina<br> GPU alloc. \| 72<br>max 2 per job \| 16<br>1 gpu \| 0 \| no \| 12 / 12h \|
			\| qviz \| > 0 \| 2 with NVIDIA® Quadro RTX™ 6000 \| 8 \| 2 \| no \| 1 / 8h \|
			\| qfat \| > 0 \| 1 (sdf1) \| 24 \| 2 \| yes \| 24 / 48h \|

			[1]: access/project-access.md

Original line number	Original line	Diff line number	Diff line
	# Karolina Queues

	Below is the list of queues available on the Karolina cluster:

	\| Queue \| Active project \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (default/max) \|
	\| ---------------- \| -------------- \| -------------------- \| ------------------------------------------------------------- \| --------- \| -------- \| ------------- \| ----------------------- \|
	\| qcpu \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qcpu_biz \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qcpu_eurohpc \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qcpu_exp \| yes \| none required \| 756 nodes<br>max 2 nodes per user \| 128 \| 150 \| no \| 1 / 1h \|
	\| qcpu_free \| yes \| < 150% of allocation \| 756 nodes<br>max 4 nodes per job \| 128 \| -100 \| no \| 12 / 18h \|
	\| qcpu_long \| yes \| > 0 \| 200 nodes<br>max 20 nodes per job, only non-accelerated nodes allowed \| 128 \| 0 \| no \| 72 / 144h \|
	\| qcpu_preempt \| yes \| > 0 \| 756 nodes<br>max 4 nodes per job \| 128 \| -200 \| no \| 12 / 12h \|
	\| qgpu \| yes \| > 0 \| 72 nodes \| 16 cpus<br>1 gpu \| 0 \| yes \| 24 / 48h \|
	\| qgpu_biz \| yes \| > 0 \| 70 nodes \| 128 \| 0 \| yes \| 24 / 48h \|
	\| qgpu_eurohpc \| yes \| > 0 \| 70 nodes \| 128 \| 0 \| yes \| 24 / 48h \|
	\| qgpu_exp \| yes \| none required \| 4 nodes<br>max 1 node per job \| 16 cpus<br>1 gpu\| 150\| no \| 1 / 1h \|
	\| qgpu_free \| yes \| < 150% of allocation \| 46 nodes<br>max 2 nodes per job \| 16 cpus<br>1 gpu\|-100\| no \| 12 / 18h \|
	\| qgpu_preempt \| yes \| > 0 \| 72 nodes<br>max 2 nodes per job \| 16 cpus<br>1 gpu\|-200\| no \| 12 / 12h \|
	\| qviz \| yes \| none required \| 2 nodes (with NVIDIA® Quadro RTX™ 6000) \| 8 \| 0 \| no \| 1 / 8h \|
	\| qfat \| yes \| > 0 \| 1 (sdf1) \| 24 \| 0 \| yes \| 24 / 48h \|

	## Legacy Queues

	\| Queue \| Active project \| Project resources \| Nodes \| Min ncpus \| Priority \| Authorization \| Walltime (default/max) \|
	\| ---------------- \| -------------- \| -------------------- \| ------------------------------------------------------------- \| --------- \| -------- \| ------------- \| ----------------------- \|
	\| qfree \| yes \| < 150% of allocation \| 756 nodes<br>max 4 nodes per job \| 128 \| -100 \| no \| 12 / 12h \|
	\| qexp \| no \| none required \| 756 nodes<br>max 2 nodes per job \| 128 \| 150 \| no \| 1 / 1h \|
	\| qprod \| yes \| > 0 \| 756 nodes \| 128 \| 0 \| no \| 24 / 48h \|
	\| qlong \| yes \| > 0 \| 200 nodes<br>max 20 nodes per job, only non-accelerated nodes allowed \| 128 \| 0 \| no \| 72 / 144h \|
	\| qnvidia \| yes \| > 0 \| 72 nodes \| 128 \| 0 \| yes \| 24 / 48h \|

Source

Target

Files

Some changes are not shown.