Compare revisions

3643d8f6 · 42a14578 · 4e47cffb · fd40c4c5 · e27feb96 · 0adb3025
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -12,7 +12,7 @@ docs:
  image: it4innovations/docker-mdcheck:latest
  allow_failure: true
  script:
-  - mdl -r ~MD013,~MD010,~MD014,~MD024,~MD026,~MD029,~MD033,~MD036,~MD037,~MD046 *.md docs.it4i # BUGS
+  - find content/docs -name "*.mdx" | xargs mdl -r ~MD002,~MD007,~MD013,~MD010,~MD014,~MD024,~MD026,~MD029,~MD033,~MD036,~MD037,~MD046

 pylint:
  stage: test
@@ -22,20 +22,16 @@ pylint:
  script:
  - pylint $(find . -name "*.py" -not -name "feslicescript.py")

-pysafety:
+capitalize:
  stage: test
-  image: it4innovations/docker-pycheck:latest
+  image: it4innovations/docker-mkdocscheck:latest
  allow_failure: true
  before_script:
  - source /opt/.venv3/bin/activate
+  - python -V # debug
+  - pip list | grep titlecase 
  script:
-  - cat requirements.txt | safety check --stdin --full-report
-
-capitalize:
-  stage: test
-  image: it4innovations/docker-mkdocscheck:latest
-  script:
-  - find mkdocs.yml docs.it4i/ \( -name '*.md' -o -name '*.yml' \) -print0 | xargs -0 -n1 scripts/titlemd.py --test
+  - find content/docs/ \( -name '*.mdx' -o -name '*.yml' \) ! -path '*einfracz*' -print0 | xargs -0 -n1 scripts/titlemd.py --test

 ext_links:
  stage: after_test
@@ -45,7 +41,7 @@ ext_links:
  # remove JSON results
  - rm *.json
  script:
-  - find docs.it4i/ -name '*.md' -exec grep --color -l http {} + | xargs awesome_bot -t 10 --allow-dupe --allow-redirect
+  - find content/docs -name '*.mdx' -exec grep --color -l http {} + | xargs awesome_bot -t 10 --allow-dupe --allow-redirect
  only:
  - master

@@ -55,8 +51,8 @@ ext_links:
  before_script:
  - echo "192.168.101.10 docs.it4i.cz" >> /etc/hosts
  - wget -V
-  - echo https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
-  - wget --spider -e robots=off -o wget.log -r -p https://docs.it4i.cz/devel/$CI_BUILD_REF_NAME/ || true
+  - echo https://docs.it4i.cz/devel/$CI_COMMIT_REF_NAME/
+  - wget --spider -e robots=off -o wget.log -r -p https://docs.it4i.cz/devel/$CI_COMMIT_REF_NAME/ || true
  script:
  - cat wget.log | awk '/^Found [0-9]+ broken link[s]?.$/,/FINISHED/ { rc=-1; print $0 }; END { exit rc }'

@@ -66,6 +62,7 @@ mkdocs:
  before_script:
  - source /opt/.venv3/bin/activate
  - python -V # debug
+  - pip install -r requirements.txt
  - pip freeze # debug
  - mkdocs -V # debug
  script:
@@ -74,9 +71,9 @@ mkdocs:
    # get modules list from clusters
  - bash scripts/get_modules.sh
    # generate site_url
-  - (if [ "${CI_BUILD_REF_NAME}" != 'master' ]; then sed -i "s/\(site_url.*$\)/\1devel\/$CI_BUILD_REF_NAME\//" mkdocs.yml;fi);
+  - (if [ "${CI_COMMIT_REF_NAME}" != 'master' ]; then sed -i "s/\(site_url.*$\)/\1devel\/$CI_COMMIT_REF_NAME\//" mkdocs.yml;fi);
    # generate ULT for code link
-  - sed -i "s/master/$CI_BUILD_REF_NAME/g" material/partials/toc.html
+#  - sed -i "s/master/$CI_BUILD_REF_NAME/g" material/partials/toc.html
    # regenerate modules matrix
  - python scripts/modules_matrix.py > docs.it4i/modules-matrix.md
  - python scripts/modules_matrix.py --json > docs.it4i/modules-matrix.json
@@ -112,7 +109,7 @@ deploy to stage:
  - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
  script:
  - chown nginx:nginx site -R
-  - rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/devel/$CI_BUILD_REF_NAME/
+  - rsync -a --delete site/ root@"$SSH_HOST_STAGE":/srv/docs.it4i.cz/devel/$CI_COMMIT_REF_NAME/
  only:
  - branches@sccs/docs.it4i.cz


--- a/.spelling
+++ b/.spelling
+Quantum Scalar I6
+JAN
+LUMI
+AI
+CI/CD
+AWS
+CLI
+FAQ
+s3cmd
+GUI
+EESSI
+hipBlas
+hipSolver
+LUMI
+apptainer
+ROCm
+HIP
 NVIDIA DGX-2
 nvidia
 smi
@@ -816,3 +833,19 @@ e-INFRA CZ
 DICE
 qgpu
 qcpu
+it4i-portal-clients
+it4icheckaccess
+it4idedicatedtime
+it4ifree
+it4ifsusage
+it4iuserfsusage
+it4iprojectfsusage
+it4imotd
+e-INFRA
+it4i-portal-clients
+s3cmd
+s5cmd
+title:
+e-INFRA CZ Cloud Ostrava
+e-INFRA CZ Account
+
--- a/README.md
+++ b/README.md
-# User documentation
-
-test
+# IT4Inovations Documentation

 This project contains IT4Innovations user documentation source.

-## Development
-
-### Install
-
-```console
-$ sudo apt install libpython-dev
-$ virtualenv venv
-$ source venv/bin/activate
-$ pip install -r requirements.txt
-```
-
-### Package upgrade with pip
-
-```console
-$ pip list -o
-$ pip install --upgrade package
-$ pip freeze | sed '/pkg-resources==/d' > requirements.txt
-```
-
-## Environments
-
-* [https://docs.it4i.cz - master branch](https://docs.it4i.cz - master branch)
-* [https://docs.it4i.cz/devel/$BRANCH_NAME](https://docs.it4i.cz/devel/$BRANCH_NAME) - maps the branches, available only with VPN access
-
-## URLs
-
-* [http://facelessuser.github.io/pymdown-extensions/](http://facelessuser.github.io/pymdown-extensions/)
-* [http://squidfunk.github.io/mkdocs-material/](http://squidfunk.github.io/mkdocs-material/)
-
-```
-fair-share
-InfiniBand
-RedHat
-CentOS
-Mellanox
-```
-
-## Mathematical Formulae
-
-### Formulas are made with:
-
-* [https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/](https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/)
-* [https://www.mathjax.org/](https://www.mathjax.org/)
-
-You can add formula to page like this:
-
-```
-$$
-MAX\_FAIRSHARE * ( 1 - \frac{usage_{Project}}{usage_{Total}} )
-$$
-```
+## Migration

-To enable the MathJX on page you need to enable it by adding line ```---8<--- "mathjax.md"``` at the end of file.
+* [fumadocs](https://fumadocs.vercel.app/)
\ No newline at end of file
--- a/docs.it4i/apiv2.md
+++ b/docs.it4i/apiv2.md
+# SCS API v2
+
+## Info
+
+- **OpenAPI:** 3.1.0
+- **Title:** scs-api-2
+- **Version:** 0.1.0
+- **Server URL:** `https://scs.it4i.cz/api/v2`
+
+## Paths
+
+### `/dedicated-time`
+
+**GET**
+
+- **Summary:** Get dedicated times
+- **Description:** Retrieves dedicated time entries, optionally filtered by cluster name or period preset
+- **OperationId:** `dedicated_time_handler`
+
+**Parameters:**
+
+- `cluster` (query): Filter by cluster name; Available values: karolina, barbora, dgx *(optional)*
+- `period` (query): Filter by time period preset; Available values: planned, active *(optional)*
+
+**Responses:**
+
+- `200`: List of dedicated time entries
+- `400`: Failed to deserialize query, Invalid cluster, Invalid period
+  Example:
+
+  ```json
+  {
+    "message": "Invalid cluster: el_gordo"
+  }
+  ```
+- `500`: Failed to retrieve dedicated time due to a server error
+  Example:
+  ```json
+  {
+    "message": "Failed to retreive dedicated time"
+  }
+  ```
+
+### `/dedicated-time-calendar`
+
+**GET**
+
+- **Summary:** Get dedicated times
+- **Description:** Retrieves dedicated time entries and generates a VCalendar response.
+- **OperationId:** `dedicated_time_calendar`
+
+**Responses:**
+
+- `200`: Dedicated time VCalendar
+  Example:
+
+  ```
+  BEGIN:VCALENDAR
+  VERSION:2.0
+  PRODID:-//SUTD Timetable Calendar//randName//EN
+  CALSCALE:GREGORIAN
+  BEGIN:VEVENT
+  UID:1234@example.com
+  DTSTAMP:20230101T000000Z
+  DTSTART:20230101T000000Z
+  DTEND:20230102T000000Z
+  SUMMARY:Sample Dedicated Time - Cluster Outage
+  DESCRIPTION:Sample Dedicated Time - Cluster Outage
+  END:VEVENT
+  END:VCALENDAR
+  ```
+
+- `500`: Failed to retrieve dedicated time calendar
+  Example:
+
+  ```json
+  {
+    "message": "Failed to retreive dedicated time calendar"
+  }
+  ```
+
+### `/motd`
+
+**GET**
+
+- **Summary:** Get messages of the day
+- **Description:** Retrieves messages of the day, optionally filtered by category
+- **OperationId:** `motd`
+
+**Parameters:**
+
+- `category` (query): *(optional)*
+
+**Responses:**
+
+- `200`: List of motd entries
+- `400`: Failed to deserialize query, Invalid motd category
+- `500`: Failed to retrieve motd entries due to a server error
+  Example:
+
+  ```json
+  {
+    "message": "Failed to retrieve motd"
+  }
+  ```
+
+## Components
+
+### Schemas
+
+#### DedicatedTime
+
+```yaml
+type: object
+required:
+  - updated_at
+properties:
+  cluster_type:
+    type: [string, 'null']
+  date_efficiency:
+    type: [string, 'null']
+    format: date-time
+  date_expiration:
+    type: [string, 'null']
+    format: date-time
+  updated_at:
+    type: string
+    format: date-time
+```
+
+#### Motd
+
+```yaml
+type: object
+required:
+  - id
+  - author
+  - category
+  - created_at
+  - updated_at
+  - date_modification
+  - title
+  - message_body
+  - systems
+properties:
+  id:
+    type: integer
+    format: int32
+    examples: [1]
+  author:
+    type: string
+    examples: [Admin]
+  category:
+    type: string
+    examples: [public-service-announcement]
+  created_at:
+    type: string
+    format: date-time
+  updated_at:
+    type: string
+    format: date-time
+  date_modification:
+    type: string
+    format: date-time
+  date_efficiency:
+    type: [string, 'null']
+    format: date-time
+  date_expiration:
+    type: [string, 'null']
+    format: date-time
+  date_outage_efficiency:
+    type: [string, 'null']
+    format: date-time
+  date_outage_expiration:
+    type: [string, 'null']
+    format: date-time
+  title:
+    type: string
+    examples: [Important Update]
+  message_body:
+    type: string
+    examples: [We are experiencing some service disruptions.]
+  systems:
+    type: array
+    items:
+      type: string
+      examples: [Karolina]
+```
+
+#### MsgResponse
+
+```yaml
+type: object
+description: |
+  Common struct for DTO-less responses
+  eg. ```200 {"message":"Operation succeeded"}```
+required:
+  - message
+properties:
+  message:
+    type: string
+    examples: [API response]
+```
--- a/docs.it4i/archive/archive-intro.md
+++ b/docs.it4i/archive/archive-intro.md
 # Introduction

-This section contains documentation of decommissioned IT4Innovations' supercomputers.
+This section contains documentation of decommissioned IT4Innovations' supercomputers and services.

 ## Salomon

@@ -42,4 +42,9 @@ At the end of January 2021, after more than seven years, its operation permanent
 | Corehours used               | 134,130,309        |
 | Power consumption            | 77 kW              |

+## PRACE
+
+Partnership for Advanced Computing in Europe aims to facilitate the access to a research infrastructure that enables high-impact scientific discovery and engineering research and development across all disciplines to enhance European competitiveness for the benefit of society. For more information, see the [official website][b].
+
 [a]: https://www.dolnivitkovice.cz/en/science-and-technology-centre/exhibitions/
+[b]: https://prace-ri.eu/
--- a/docs.it4i/barbora-ng/hardware-overview.md
+++ b/docs.it4i/barbora-ng/hardware-overview.md
+# Hardware Overview
+
+!!!important Work in progress
+    Barbora NG documentation is a WIP.
+    The documentation is still being developed (reflecting changes in technical specifications) and may be updated frequently.
+
+    The launch of Barbora NG is planned for October/November.
+    In the meantime, the first computational resources have already been allocated in the latest Open Access Grant Competition.
+
+Barbora NG consists of 141 non-accelerated compute nodes named **cn[?-???]**.
+Each node is a powerful x86-64 computer equipped with 192 cores
+(2x Intel Xeon 6952P with 96 CPU cores) and 768 GB RAM.
+User access to the Barbora NG cluster is provided by two login nodes **login[1-2]**.
+The nodes are interlinked through high speed InfiniBand NDR and Ethernet networks.
+
+The parameters are summarized in the following tables:
+
+| **In general**                       |                       |
+| ------------------------------------ | --------------------- |
+| Architecture of compute nodes        | x86-64                |
+| Operating system                     | Linux                 |
+| [**Compute nodes**][1]               |                       |
+| Total                                | 141                   |
+| Processor Type                       | [Intel Xeon 6952P][b] |
+| Architecture                         | Granite Rapids        |
+| Processor cores                      | 96                    |
+| Processors per node                  | 2                     |
+| RAM                                  | 768 GB                |
+| Local disk drive                     | no                    |
+| Compute network                      | InfiniBand HDR        |
+| non-accelerated                      | 141, cn[?-???]        |
+| **In total**                         |                       |
+| Theoretical peak performance (Rpeak) | ??? TFLOP/s           |
+| Cores                                | 27072                 |
+| RAM                                  | 108.288 TB            |
+
+[1]: compute-nodes.md
+[2]: ../general/resources-allocation-policy.md
+[3]: network.md
+[4]: storage.md
+[5]: ../general/shell-and-data-access.md
+[6]: visualization.md
+
+[a]: https://support.it4i.cz/rt
+[b]: https://www.intel.com/content/www/us/en/products/sku/241643/intel-xeon-6952p-processor-480m-cache-2-10-ghz/specifications.html
\ No newline at end of file
--- a/docs.it4i/barbora-ng/introduction.md
+++ b/docs.it4i/barbora-ng/introduction.md
+# Introduction
+
+!!!important Work in progress
+    Barbora NG documentation is a WIP.
+    The documentation is still being developed (reflecting changes in technical specifications) and may be updated frequently.
+
+    The launch of Barbora NG is planned for October/November.
+    In the meantime, the first computational resources have already been allocated in the latest Open Access Grant Competition.
+
+Welcome to Barbora Next Gen (NG) supercomputer cluster.
+Barbora NG is our latest supercomputer which consists of 141 compute nodes,
+totaling 27072 compute cores with 108288 GB RAM, giving over ??? TFLOP/s theoretical peak performance.
+
+Nodes are interconnected through a fully non-blocking fat-tree InfiniBand NDR network
+and are equipped with Intel Granite Rapids processors.
+Read more in [Hardware Overview][1].
+
+The cluster runs with an operating system compatible with the Red Hat [Linux family][a]. We have installed a wide range of software packages targeted at different scientific domains.
+These packages are accessible via the [modules environment][2].
+
+The user data shared file system and job data shared file system are available to users.
+
+The [Slurm][b] workload manager provides [computing resources allocations and job execution][3].
+
+Read more on how to [apply for resources][4], [obtain login credentials][5] and [access the cluster][6].
+
+
+[1]: hardware-overview.md
+[2]: ../environment-and-modules.md
+[3]: ../general/resources-allocation-policy.md
+[4]: ../general/applying-for-resources.md
+[5]: ../general/obtaining-login-credentials/obtaining-login-credentials.md
+[6]: ../general/shell-and-data-access.md
+
+[a]: http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg
+[b]: https://slurm.schedmd.com/
--- a/docs.it4i/barbora/compute-nodes.md
+++ b/docs.it4i/barbora/compute-nodes.md
 # Compute Nodes

-Barbora is a cluster of x86-64 Intel-based nodes built with the BullSequana Computing technology. The cluster contains three types of compute nodes.
+Barbora is a cluster of x86-64 Intel-based nodes built with the BullSequana Computing technology.
+The cluster contains three types of compute nodes.

 ## Compute Nodes Without Accelerators

 * 192 nodes
 * 6912 cores in total
 * 2x Intel Cascade Lake 6240, 18-core, 2.6 GHz processors per node
-* 192 GB DDR4 2933MT/s of physical memory per node (12x 16 GB)
+* 192 GB DDR4 2933 MT/s of physical memory per node (12x16 GB)
 * BullSequana X1120 blade servers
-* 2995,2 GFLOP/s per compute node
+* 2995.2 GFLOP/s per compute node
 * 1x 1 GB Ethernet
 * 1x HDR100 IB port
-* 3 computes nodes per X1120 blade server
+* 3 compute nodes per X1120 blade server
 * cn[1-192]

 ![](img/BullSequanaX1120.png)
@@ -22,11 +23,11 @@ Barbora is a cluster of x86-64 Intel-based nodes built with the BullSequana Comp
 * 8 nodes
 * 192 cores in total
 * two Intel Skylake Gold 6126, 12-core, 2.6 GHz processors per node
-* 192 GB DDR4 2933MT/s with ECC of physical memory per node (12x 16 GB)
+* 192 GB DDR4 2933MT/s with ECC of physical memory per node (12x16 GB)
 * 4x GPU accelerator NVIDIA Tesla V100-SXM2 per node
 * Bullsequana X410-E5 NVLink-V blade servers
-* 1996,8 GFLOP/s per compute nodes
-* GPU-tp-GPU All-to-All NVLINK 2.0, GPU-Direct
+* 1996.8 GFLOP/s per compute nodes
+* GPU-to-GPU All-to-All NVLINK 2.0, GPU-Direct
 * 1 GB Ethernet
 * 2x HDR100 IB ports
 * cn[193-200]
@@ -37,8 +38,8 @@ Barbora is a cluster of x86-64 Intel-based nodes built with the BullSequana Comp

 * 1x BullSequana X808 server
 * 128 cores in total
-* 8 Intel Skylake 8153, 16-core, 2.0 GHz, 125W
-* 6144 GiB DDR4 2667MT/s of physical memory per node (92x 64 GB)
+* 8 Intel Skylake 8153, 16-core, 2.0 GHz, 125 W
+* 6144 GiB DDR4 2667 MT/s of physical memory per node (92x64 GB)
 * 2x HDR100 IB port
 * 8192 GFLOP/s
 * cn[201]
@@ -47,19 +48,21 @@ Barbora is a cluster of x86-64 Intel-based nodes built with the BullSequana Comp

 ## Compute Node Summary

-| Node type                    | Count | Range       | Memory  | Cores         | Queues                     |
-| ---------------------------- | ----- | ----------- | ------  | -----------   | -------------------------- |
-| Nodes without an accelerator | 189   | cn[1-189]   | 192GB   | 36 @ 2.6 GHz  | qexp, qprod, qlong, qfree  |
-| Nodes with a GPU accelerator | 8     | cn[190-197] | 192GB   | 24 @ 2.6 GHz  | qnvidia                    |
-| Fat compute nodes            | 1     | cn[198]     | 6144GiB | 128 @ 2.0 GHz | qfat                       |
+| Node type                    | Count | Range       | Memory   | Cores         |
+| ---------------------------- | ----- | ----------- | -------- | ------------- |
+| Nodes without an accelerator | 192   | cn[1-192]   | 192 GB   | 36 @ 2.6 GHz  |
+| Nodes with a GPU accelerator | 8     | cn[193-200] | 192 GB   | 24 @ 2.6 GHz  |
+| Fat compute nodes            | 1     | cn[201]     | 6144 GiB | 128 @ 2.0 GHz |

 ## Processor Architecture

-Barbora is equipped with Intel Cascade Lake processors Intel Xeon 6240 (nodes without accelerators), Intel Skylake Gold 6126 (nodes with accelerators) and Intel Skylake Platinum 8153.
+Barbora is equipped with Intel Cascade Lake processors Intel Xeon 6240 (nodes without accelerators),
+Intel Skylake Gold 6126 (nodes with accelerators) and Intel Skylake Platinum 8153.

 ### Intel [Cascade Lake 6240][d]

-Cascade Lake core is largely identical to that of [Skylake's][a]. For in-depth detail of the Skylake core/pipeline see [Skylake (client) § Pipeline][b].
+Cascade Lake core is largely identical to that of [Skylake's][a].
+For in-depth detail of the Skylake core/pipeline see [Skylake (client) § Pipeline][b].

 Xeon Gold 6240 is a 64-bit 18-core x86 multi-socket high performance server microprocessor set to be introduced by Intel in late 2018. This chip supports up to 4-way multiprocessing. The Gold 6240, which is based on the Cascade Lake microarchitecture and is manufactured on a 14 nm process, sports 2 AVX-512 FMA units as well as three Ultra Path Interconnect links. This microprocessor, which operates at 2.6 GHz with a TDP of 150 W and a turbo boost frequency of up to 3.9 GHz, supports up 1 TB of hexa-channel DDR4-2933 ECC memory.

@@ -116,23 +119,23 @@ Barbora is equipped with an [NVIDIA Tesla V100-SXM2][g] accelerator.

 ![](img/gpu-v100.png)

-|NVIDIA Tesla V100-SXM2||
-| --- | --- |
-| GPU Architecture | NVIDIA Volta |
-| NVIDIA Tensor| Cores: 640 |
-| NVIDIA CUDA® Cores | 5 120 |
-| Double-Precision Performance | 7.8 TFLOP/s |
-| Single-Precision Performance | 15.7 TFLOP/s |
-| Tensor Performance | 125 TFLOP/s |
-| GPU Memory | 16 GB HBM2 |
-| Memory Bandwidth | 900 GB/sec |
-| ECC | Yes |
-| Interconnect Bandwidth | 300 GB/sec |
-| System Interface | NVIDIA NVLink |
-| Form Factor | SXM2 |
-| Max Power Consumption | 300 W |
-| Thermal Solution | Passive |
-| Compute APIs | CUDA, DirectCompute,OpenCLTM, OpenACC |
+| NVIDIA Tesla V100-SXM2       |                                        |
+| ---------------------------- | -------------------------------------- |
+| GPU Architecture             | NVIDIA Volta                           |
+| NVIDIA Tensor Cores          | 640                                    |
+| NVIDIA CUDA® Cores           | 5120                                   |
+| Double-Precision Performance | 7.8 TFLOP/s                             |
+| Single-Precision Performance | 15.7 TFLOP/s                            |
+| Tensor Performance           | 125 TFLOP/s                             |
+| GPU Memory                   | 16 GB HBM2                              |
+| Memory Bandwidth             | 900 GB/sec                              |
+| ECC                          | Yes                                    |
+| Interconnect Bandwidth       | 300 GB/sec                              |
+| System Interface             | NVIDIA NVLink                          |
+| Form Factor                  | SXM2                                   |
+| Max Power Consumption        | 300 W                                   |
+| Thermal Solution             | Passive                                |
+| Compute APIs                 | CUDA, DirectCompute, OpenCLTM, OpenACC |

 [a]: https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(server)#Core
 [b]: https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Pipeline

--- a/docs.it4i/barbora/hardware-overview.md
+++ b/docs.it4i/barbora/hardware-overview.md
 # Hardware Overview

-The Barbora cluster consists of 201 computational nodes named **cn[1-201]** of which 192 are regular compute nodes, 8 are GPU Tesla V100 accelerated nodes and 1 is a fat node. Each node is a powerful x86-64 computer, equipped with 36/24/128 cores (18-core Intel Cascade Lake 6240 / 12-core Intel Skylake Gold 6126 / 16-core Intel Skylake 8153), at least 192 GB of RAM. User access to the Barbora cluster is provided by two login nodes **login[1,2]**. The nodes are interlinked through high speed InfiniBand and Ethernet networks.
+The Barbora cluster consists of 201 computational nodes named **cn[001-201]**
+of which 192 are regular compute nodes, 8 are GPU Tesla V100 accelerated nodes and 1 is a fat node.
+Each node is a powerful x86-64 computer, equipped with 36/24/128 cores
+(18-core Intel Cascade Lake 6240 / 12-core Intel Skylake Gold 6126 / 16-core Intel Skylake 8153), at least 192 GB of RAM.
+User access to the Barbora cluster is provided by two login nodes **login[1,2]**.
+The nodes are interlinked through high speed InfiniBand and Ethernet networks.

-The Fat node is equipped with a large amount (6144 GB) of memory. Virtualization infrastructure provides resources to run long-term servers and services in virtual mode. The Accelerated nodes, Fat node, and Virtualization infrastructure are available [upon request][a] from a PI.
+The fat node is equipped with 6144 GB of memory.
+Virtualization infrastructure provides resources for running long-term servers and services in virtual mode.
+The Accelerated nodes, fat node, and virtualization infrastructure are available [upon request][a] from a PI.

 **There are three types of compute nodes:**

@@ -10,14 +17,17 @@ The Fat node is equipped with a large amount (6144 GB) of memory. Virtualization
 * 8 compute nodes with a GPU accelerator - 4x NVIDIA Tesla V100-SXM2
 * 1 fat node - equipped with 6144 GB of RAM

-[More about Compute nodes][1].
+[More about compute nodes][1].

 GPU and accelerated nodes are available upon request, see the [Resources Allocation Policy][2].

-All of these nodes are interconnected through fast InfiniBand and Ethernet networks.  [More about the Network][3].
-Every chassis provides an InfiniBand switch, marked **isw**, connecting all nodes in the chassis, as well as connecting the chassis to the upper level switches.
+All of these nodes are interconnected through fast InfiniBand and Ethernet networks.
+[More about the computing network][3].
+Every chassis provides an InfiniBand switch, marked **isw**, connecting all nodes in the chassis,
+as well as connecting the chassis to the upper level switches.

-User access to Barbora is provided by two login nodes: login1 and login2. [More about accessing the cluster][5].
+User access to Barbora is provided by two login nodes: login1 and login2.
+[More about accessing the cluster][5].

 The parameters are summarized in the following tables:

@@ -32,20 +42,20 @@ The parameters are summarized in the following tables:
 | RAM                                         | min. 192 GB                                  |
 | Local disk drive                            | no                                           |
 | Compute network                             | InfiniBand HDR                               |
-| w/o accelerator                             | 192, cn[1-192]                               |
-| GPU accelerated                             | 8, cn[194-200]                               |
+| w/o accelerator                             | 192, cn[001-192]                             |
+| GPU accelerated                             | 8, cn[193-200]                               |
 | Fat compute nodes                           | 1, cn[201]                                   |
-| **In total**                                |                                              |
+| **In total**                               |                                             |
 | Total theoretical peak performance  (Rpeak) | 848.8448 TFLOP/s                             |
 | Total amount of RAM                         | 44.544 TB                                    |

-| Node             | Processor                                | Memory  | Accelerator            |
-| ---------------- | ---------------------------------------  | ------  | ---------------------- |
-| w/o accelerator  | 2 x Intel Cascade Lake 6240, 2.6 GHz     | 192 GB  | -                      |
-| GPU accelerated  | 2 x Intel Skylake Gold 6126, 2.6 GHz     | 192 GB  | NVIDIA Tesla V100-SXM2 |
-| Fat compute node | 2 x Intel Skylake Platinum 8153, 2.0 GHz | 6144 GB | -                      |
+| Node             | Processor                               | Memory | Accelerator            |
+| ---------------- | --------------------------------------- | ------ | ---------------------- |
+| Regular node     | 2x Intel Cascade Lake 6240, 2.6 GHz     | 192GB  | -                      |
+| GPU accelerated  | 2x Intel Skylake Gold 6126, 2.6 GHz     | 192GB  | NVIDIA Tesla V100-SXM2 |
+| Fat compute node | 2x Intel Skylake Platinum 8153, 2.0 GHz | 6144GB | -                      |

-For more details refer to [Compute nodes][1], [Storage][4], [Visualization servers][6], and [Network][3].
+For more details refer to the sections [Compute Nodes][1], [Storage][4], [Visualization Servers][6], and [Network][3].

 [1]: compute-nodes.md
 [2]: ../general/resources-allocation-policy.md

--- a/docs.it4i/barbora/introduction.md
+++ b/docs.it4i/barbora/introduction.md
@@ -2,13 +2,13 @@

 Welcome to Barbora supercomputer cluster. The Barbora cluster consists of 201 compute nodes, totaling 7232 compute cores with 44544 GB RAM, giving over 848 TFLOP/s theoretical peak performance.

-Nodes are interconnected through a fully non-blocking fat-tree InfiniBand network, and are equipped with Intel Cascade Lake processors. A few nodes are also equipped with NVIDIA Tesla V100-SXM2 Read more in [Hardware Overview][1].
+Nodes are interconnected through a fully non-blocking fat-tree InfiniBand network, and are equipped with Intel Cascade Lake processors. A few nodes are also equipped with NVIDIA Tesla V100-SXM2. Read more in [Hardware Overview][1].

 The cluster runs with an operating system compatible with the Red Hat [Linux family][a]. We have installed a wide range of software packages targeted at different scientific domains. These packages are accessible via the [modules environment][2].

-The user data shared file-system and job data shared file-system are available to users.
+The user data shared file system and job data shared file system are available to users.

-The [PBS Professional Open Source Project][b] workload manager provides [computing resources allocations and job execution][3].
+The [Slurm][b] workload manager provides [computing resources allocations and job execution][3].

 Read more on how to [apply for resources][4], [obtain login credentials][5] and [access the cluster][6].

@@ -22,4 +22,4 @@ Read more on how to [apply for resources][4], [obtain login credentials][5] and
 [6]: ../general/shell-and-data-access.md

 [a]: http://upload.wikimedia.org/wikipedia/commons/1/1b/Linux_Distribution_Timeline.svg
-[b]: https://www.pbspro.org/
+[b]: https://slurm.schedmd.com/
--- a/docs.it4i/barbora/network.md
+++ b/docs.it4i/barbora/network.md
@@ -2,9 +2,11 @@

 All of the compute and login nodes of Barbora are interconnected through a [InfiniBand][a] HDR 200 Gbps network and a Gigabit Ethernet network.

-Compute nodes and the service infrastructure is connected by the HDR100 technology that allows one 200Gbps HDR port (aggregation 4x 50Gbps) to be divided into two HDR100 ports with 100Gbps (2x 50Gbps) bandwidth.
+Compute nodes and the service infrastructure is connected by the HDR100 technology
+that allows one 200 Gbps HDR port (aggregation 4x 50 Gbps) to be divided into two HDR100 ports with 100 Gbps (2x 50 Gbps) bandwidth.

-The cabling between the L1 and L2 layer is realized by HDR cabling, connecting the end devices is realized by so called Y or splitter cable (1x HRD200 - 2x HDR100).
+The cabling between the L1 and L2 layer is realized by HDR cabling,
+connecting the end devices is realized by so called Y or splitter cable (1x HRD200 - 2x HDR100).

 ![](img/hdr.jpg)

@@ -21,9 +23,9 @@ The cabling between the L1 and L2 layer is realized by HDR cabling, connecting t

 **Performance**

-* 40x HDR 200Gb/s ports in a 1U switch
-* 80x HDR100 100Gb/s ports in a 1U switch
-* 16Tb/s aggregate switch throughput
+* 40x HDR 200 Gb/s ports in a 1U switch
+* 80x HDR100 100 Gb/s ports in a 1U switch
+* 16 Tb/s aggregate switch throughput
 * Up to 15.8 billion messages-per-second
 * 90ns switch latency


--- a/docs.it4i/barbora/storage.md
+++ b/docs.it4i/barbora/storage.md
@@ -120,7 +120,7 @@ The filesystem is backed up, so that it can be restored in case of a catastrophi

 The SCRATCH is realized as Lustre parallel file system and is available from all login and computational nodes. There are 5 OSTs dedicated for the SCRATCH file system.

-The SCRATCH filesystem is mounted in directory /scratch. Users may freely create subdirectories and files on the filesystem. Accessible capacity is 310TB, shared among all users. Individual users are restricted by filesystem usage quotas, set to 10TB per user. The purpose of this quota is to prevent runaway programs from filling the entire filesystem and deny service to other users. Should 10TB prove insufficient, contact [support][d], the quota may be lifted upon request.
+The SCRATCH filesystem is mounted in the `/scratch/project/PROJECT_ID` directory created automatically with the `PROJECT_ID` project. Accessible capacity is 310TB, shared among all users. Individual users are restricted by filesystem usage quotas, set to 10TB per user. The purpose of this quota is to prevent runaway programs from filling the entire filesystem and deny service to other users. Should 10TB prove insufficient, contact [support][d], the quota may be lifted upon request.

 !!! note
    The Scratch filesystem is intended for temporary scratch data generated during the calculation as well as for high-performance access to input and output files. All I/O intensive jobs must use the SCRATCH filesystem as their working directory.

--- a/docs.it4i/barbora/visualization.md
+++ b/docs.it4i/barbora/visualization.md
@@ -4,8 +4,8 @@ Remote visualization with [VirtualGL][3] is available on two nodes.

 * 2 nodes
 * 32 cores in total
-* 2x Intel Skylake Gold 6130 – 16core@2,1GHz processors per node
-* 192 GB DDR4 2667MT/s of physical memory per node (12x 16 GB)
+* 2x Intel Skylake Gold 6130 – 16-core@2,1 GHz processors per node
+* 192 GB DDR4 2667 MT/s of physical memory per node (12x 16 GB)
 * BullSequana X450-E5 blade servers
 * 2150.4 GFLOP/s per compute node
 * 1x 1 GB Ethernet and 2x 10 GB Ethernet

--- a/docs.it4i/cloud/.gitkeep
+++ b/docs.it4i/cloud/.gitkeep
--- a/docs.it4i/cloud/einfracz-cloud.md
+++ b/docs.it4i/cloud/einfracz-cloud.md
+# e-INFRA CZ Cloud Ostrava
+
+Ostrava cloud consists of 22 nodes from the [Karolina][a] supercomputer.
+The cloud site is built on top of OpenStack,
+which is a free open standard cloud computing platform.
+
+## Access
+
+To acces the cloud you must:
+
+* have an [e-Infra CZ account][3],
+* be a member of an [active project][b].
+
+The dashboard is available at [https://ostrava.openstack.cloud.e-infra.cz/][6].
+
+You can specify resources/quotas for your project.
+For more information, see the [Quota Limits][5] section.
+
+## Creating First Instance
+
+To create your first VM instance, follow the [e-INFRA CZ guide][4].
+Note that the guide is similar for clouds in Brno and Ostrava,
+so make sure that you follow steps for Ostrava cloud where applicable.
+
+### Process Automatization
+
+You can automate the process using Terraform or Openstack.
+
+#### Terraform
+
+Prerequisites:
+
+* Linux/Mac/WSL terminal BASH shell
+* installed Terraform and sshuttle
+* downloaded [application credentials][9] from OpenStack Horizon dashboard and saved as a `project_openrc.sh.inc` text file
+
+Follow the guide: [https://code.it4i.cz/terraform][8]
+
+#### OpenStack
+
+Prerequisites:
+
+* Linux/Mac/WSL terminal BASH shell
+* installed [OpenStack client][7]
+
+Follow the guide: [https://code.it4i.cz/commandline][10]
+
+Run commands:
+
+```console
+source project_openrc.sh.inc
+```
+
+```console
+./cmdline-demo.sh basic-infrastructure-1
+```
+
+## Technical Reference
+
+For the list of deployed OpenStack services, see the [list of components][1].
+
+More information can be found on the [e-INFRA CZ website][2].
+
+[1]: https://docs.platforms.cloud.e-infra.cz/en/docs/technical-reference/ostrava-g2-site/openstack-components
+[2]: https://docs.platforms.cloud.e-infra.cz/en/docs/technical-reference/ostrava-g2-site
+[3]: https://docs.account.e-infra.cz/en/docs/access/account#how-to-apply-for-the-first-time
+[4]: https://docs.platforms.cloud.e-infra.cz/en/docs/getting-started/creating-first-infrastructure
+[5]: https://docs.platforms.cloud.e-infra.cz/en/docs/technical-reference/ostrava-g2-site/quota-limits
+[6]: https://ostrava.openstack.cloud.e-infra.cz/
+[7]: https://cyso.cloud/docs/cloud/extra/how-to-use-the-openstack-cli-tools-on-linux/
+[8]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/terraform
+[9]: https://docs.platforms.cloud.e-infra.cz/en/docs/how-to-guides/obtaining-api-key
+[10]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/commandline
+
+[a]: ../karolina/introduction.md
+[b]: ../general/access/project-access.md
--- a/docs.it4i/cloud/it4i-cloud.md
+++ b/docs.it4i/cloud/it4i-cloud.md
+# IT4I Cloud
+
+IT4I cloud consists of 14 nodes from the [Karolina][a] supercomputer.
+The cloud site is built on top of OpenStack,
+which is a free open standard cloud computing platform.
+
+!!! Note
+    The guide describes steps for personal projects.<br>
+    Some steps may differ for large projects.<br>
+    For large project, apply for resources to the [Allocation Committee][11].
+
+## Access
+
+To access the cloud you must be a member of an active EUROHPC project,
+or fall into the **Access Category B**, i.e. [Access For Thematic HPC Resource Utilisation][11].
+
+A personal OpenStack project is required. Request one by contacting [IT4I Support][12].
+
+The dashboard is available at [https://cloud.it4i.cz][6].
+
+You can see quotas set for the IT4I Cloud in the [Quota Limits][f] section.
+
+## Creating First Instance
+
+To create your first VM instance, follow the steps below:
+
+### Log In
+
+Go to [https://cloud.it4i.cz][6], enter your LDAP username and password and choose the `IT4I_LDAP` domain. After you sign in, you will be redirected to the dashboard.
+
+![](../img/login.png)
+
+### Create Key Pair
+
+SSH key is required for remote access to your instance.
+
+1. Go to **Project > Compute > Key Pairs** and click the **Create Key Pair** button.
+
+    ![](../img/keypairs.png)
+
+1. In the Create Key Pair window, name your key pair, select `SSH Key` for key type and confirm by clicking Create Key Pair.
+
+    ![](../img/keypairs1.png)
+
+1. Download and manage the private key according to your operating system.
+
+### Update Security Group
+
+To be able to remotely access your VM instance, you have to allow access in the security group.
+
+1. Go to **Project > Network > Security Groups** and click on **Manage Rules**, for the default security group.
+
+    ![](../img/securityg.png)
+
+1. Click on **Add Rule**, choose **SSH**, and leave the remaining fields unchanged.
+
+    ![](../img/securityg1.png)
+
+### Create VM Instance
+
+1. In **Compute > Instances**, click **Launch Instance**.
+
+    ![](../img/instance.png)
+
+1. Choose Instance Name, Description, and number of instances. Click **Next**.
+
+    ![](../img/instance1.png)
+
+1. Choose an image from which to boot the instance. Choose to delete the volume after instance delete. Click **Next**.
+
+    ![](../img/instance2.png)
+
+1. Choose the hardware resources of the instance by selecting a flavor. Additional volumes for data can be attached later on. Click **Next**.
+
+    ![](../img/instance3.png)
+
+1. Select the network and continue to **Security Groups**.
+
+    ![](../img/instance4.png)
+
+1. Allocate the security group with SSH rule that you added in the [Update Security Group](it4i-cloud.md#update-security-group) step. Then click **Next** to go to the **Key Pair**.
+
+    ![](../img/securityg2.png)
+
+1. Select the key that you created in the [Create Key Pair][g] section and launch the instance.
+
+    ![](../img/instance5.png)
+
+### Associate Floating IP
+
+1. Click on the **Associate** button next to the floating IP.
+
+    ![](../img/floatingip.png)
+
+1. Select Port to be associated with the instance, then click the **Associate** button.
+
+Now you can join the VM using your preferred SSH client.
+
+## Process Automatization
+
+You can automate the process using Openstack.
+
+### OpenStack
+
+Prerequisites:
+
+* Linux/Mac/WSL terminal BASH shell
+* installed [OpenStack client][7]
+
+Follow the guide: [https://code.it4i.cz/commandline][10]
+
+Run commands:
+
+```console
+source project_openrc.sh.inc
+```
+
+```console
+./cmdline-demo.sh basic-infrastructure-1
+```
+
+[1]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-site/openstack-components/
+[2]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-site/
+[3]: https://docs.e-infra.cz/account/
+[4]: https://docs.e-infra.cz/compute/openstack/getting-started/creating-first-infrastructure/
+[5]: https://docs.e-infra.cz/compute/openstack/technical-reference/ostrava-g2-site/quota-limits/
+[6]: https://cloud.it4i.cz
+[7]: https://docs.fuga.cloud/how-to-use-the-openstack-cli-tools-on-linux
+[8]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/terraform
+[9]: https://docs.e-infra.cz/compute/openstack/how-to-guides/obtaining-api-key/
+[10]: https://code.it4i.cz/dvo0012/infrastructure-by-script/-/tree/main/openstack-infrastructure-as-code-automation/clouds/g2/ostrava/general/commandline
+[11]: https://www.it4i.cz/en/for-users/computing-resources-allocation
+[12]: mailto:support@it4i.cz @@
+
+[a]: ../karolina/introduction.md
+[b]: ../general/access/project-access.md
+[c]: einfracz-cloud.md
+[d]: ../general/accessing-the-clusters/vpn-access.md
+[e]: ../general/obtaining-login-credentials/obtaining-login-credentials.md
+[f]: it4i-quotas.md
+[g]: it4i-cloud.md#create-key-pair
+
+
--- a/docs.it4i/cloud/it4i-quotas.md
+++ b/docs.it4i/cloud/it4i-quotas.md
+# IT4I Cloud Quotas
+
+| Resource                              | Quota |
+|---------------------------------------|-------|
+| Instances                             |    10 |
+| VCPUs                                 |    20 |
+| RAM                                   |  32GB |
+| Volumes                               |    20 |
+| Volume Snapshots                      |    12 |
+| Volume Storage                        |   500 |
+| Floating-IPs                          |     1 |
+| Security Groups                       |    10 |
+| Security Group Rules                  |   100 |
+| Networks                              |     1 |
+| Ports                                 |    10 |
+| Routers                               |     1 |
+| Backups                               |    12 |
+| Groups                                |    10 |
+| rbac_policies                         |    10 |
+| Subnets                               |     1 |
+| Subnet_pools                          |    -1 |
+| Fixed-ips                             |    -1 |
+| Injected-file-size                    | 10240 |
+| Injected-path-size                    |   255 |
+| Injected-files                        |     5 |
+| Key-pairs                             |   100 |
+| Properties                            |   128 |
+| Server-groups                         |    10 |
+| Server-group-members                  |    10 |
+| Backup-gigabytes                      |  1002 |
+| Per-volume-gigabytes                  |    -1 |
--- a/docs.it4i/cs/.gitkeep
+++ b/docs.it4i/cs/.gitkeep
--- a/docs.it4i/cs/accessing.md
+++ b/docs.it4i/cs/accessing.md
+# Accessing Complementary Systems
+
+Complementary systems can be accessed at `login.cs.it4i.cz`
+by any user with an active account assigned to an active project.
+
+**SSH is required** to access Complementary systems.
+
+## Data Storage
+
+### Home
+
+The `/home` file system is shared across all Complementary systems. Note that this file system is **not** shared with the file system on IT4I clusters.
+
+### Scratch
+
+There are local `/lscratch` storages on individual nodes.
+
+### PROJECT
+
+Complementary systems are connected to the [PROJECT storage][1].
+
+[1]: ../storage/project-storage.md
--- a/docs.it4i/cs/guides/amd.md
+++ b/docs.it4i/cs/guides/amd.md
+# Using AMD Partition
+
+For testing your application on the AMD partition,
+you need to prepare a job script for that partition or use the interactive job:
+
+```console
+salloc -N 1 -c 64 -A PROJECT-ID -p p03-amd --gres=gpu:4 --time=08:00:00
+```
+
+where:
+
+- `-N 1` means allocating one server,
+- `-c 64` means allocating 64 cores,
+- `-A` is your project,
+- `-p p03-amd` is AMD partition,
+- `--gres=gpu:4` means allocating all 4 GPUs of the node,
+- `--time=08:00:00` means allocation for 8 hours.
+
+You have also an option to allocate subset of the resources only,
+by reducing the `-c` and `--gres=gpu` to smaller values.
+
+```console
+salloc -N 1 -c 48 -A PROJECT-ID -p p03-amd --gres=gpu:3 --time=08:00:00
+salloc -N 1 -c 32 -A PROJECT-ID -p p03-amd --gres=gpu:2 --time=08:00:00
+salloc -N 1 -c 16 -A PROJECT-ID -p p03-amd --gres=gpu:1 --time=08:00:00
+```
+
+!!! Note
+    p03-amd01 server has hyperthreading **enabled** therefore htop shows 128 cores.<br>
+    p03-amd02 server has hyperthreading **disabled** therefore htop shows 64 cores.
+
+## Using AMD MI100 GPUs
+
+The AMD GPUs can be programmed using the [ROCm open-source platform](https://docs.amd.com/).
+
+ROCm and related libraries are installed directly in the system.
+You can find it here:
+
+```console
+/opt/rocm/
+```
+
+The actual version can be found here:
+
+```console
+[user@p03-amd02.cs]$ cat /opt/rocm/.info/version
+
+5.5.1-74
+```
+
+## Basic HIP Code
+
+The first way how to program AMD GPUs is to use HIP.
+
+The basic vector addition code in HIP looks like this.
+This a full code and you can copy and paste it into a file.
+For this example we use `vector_add.hip.cpp`.
+
+```console
+#include <cstdio>
+#include <hip/hip_runtime.h>
+
+
+
+__global__ void add_vectors(float * x, float * y, float alpha, int count)
+{
+    long long idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if(idx < count)
+        y[idx] += alpha * x[idx];
+}
+
+int main()
+{
+    // number of elements in the vectors
+    long long count = 10;
+
+    // allocation and initialization of data on the host (CPU memory)
+    float * h_x = new float[count];
+    float * h_y = new float[count];
+    for(long long i = 0; i < count; i++)
+    {
+        h_x[i] = i;
+        h_y[i] = 10 * i;
+    }
+
+    // print the input data
+    printf("X:");
+    for(long long i = 0; i < count; i++)
+        printf(" %7.2f", h_x[i]);
+    printf("\n");
+    printf("Y:");
+    for(long long i = 0; i < count; i++)
+        printf(" %7.2f", h_y[i]);
+    printf("\n");
+
+    // allocation of memory on the GPU device
+    float * d_x;
+    float * d_y;
+    hipMalloc(&d_x, count * sizeof(float));
+    hipMalloc(&d_y, count * sizeof(float));
+
+    // copy the data from host memory to the device
+    hipMemcpy(d_x, h_x, count * sizeof(float), hipMemcpyHostToDevice);
+    hipMemcpy(d_y, h_y, count * sizeof(float), hipMemcpyHostToDevice);
+
+    int tpb = 256;
+    int bpg = (count - 1) / tpb + 1;
+    // launch the kernel on the GPU
+    add_vectors<<< bpg, tpb >>>(d_x, d_y, 100, count);
+    // hipLaunchKernelGGL(add_vectors, bpg, tpb, 0, 0, d_x, d_y, 100, count);
+
+    // copy the result back to CPU memory
+    hipMemcpy(h_y, d_y, count * sizeof(float), hipMemcpyDeviceToHost);
+
+    // print the results
+    printf("Y:");
+    for(long long i = 0; i < count; i++)
+        printf(" %7.2f", h_y[i]);
+    printf("\n");
+
+    // free the allocated memory
+    hipFree(d_x);
+    hipFree(d_y);
+    delete[] h_x;
+    delete[] h_y;
+
+    return 0;
+}
+```
+
+To compile the code we use `hipcc` compiler.
+For compiler information, use `hipcc --version`:
+
+```console
+[user@p03-amd02.cs ~]$ hipcc --version
+
+HIP version: 5.5.30202-eaf00c0b
+AMD clang version 16.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.5.1 23194 69ef12a7c3cc5b0ccf820bc007bd87e8b3ac3037)
+Target: x86_64-unknown-linux-gnu
+Thread model: posix
+InstalledDir: /opt/rocm-5.5.1/llvm/bin
+```
+
+The code is compiled a follows:
+
+```console
+hipcc vector_add.hip.cpp -o vector_add.x
+```
+
+The correct output of the code is:
+
+```console
+[user@p03-amd02.cs ~]$ ./vector_add.x
+X:    0.00    1.00    2.00    3.00    4.00    5.00    6.00    7.00    8.00    9.00
+Y:    0.00   10.00   20.00   30.00   40.00   50.00   60.00   70.00   80.00   90.00
+Y:    0.00  110.00  220.00  330.00  440.00  550.00  660.00  770.00  880.00  990.00
+```
+
+More details on HIP programming is in the [HIP Programming Guide](https://docs.amd.com/bundle/HIP-Programming-Guide-v5.5/page/Introduction_to_HIP_Programming_Guide.html)
+
+## HIP and ROCm Libraries
+
+The list of official AMD libraries can be found [here](https://docs.amd.com/category/libraries).
+
+The libraries are installed in the same directory is ROCm
+
+```console
+/opt/rocm/
+```
+
+Following libraries are installed:
+
+```console
+drwxr-xr-x  4 root root   44 Jun  7 14:09 hipblas
+drwxr-xr-x  3 root root   17 Jun  7 14:09 hipblas-clients
+drwxr-xr-x  3 root root   29 Jun  7 14:09 hipcub
+drwxr-xr-x  4 root root   44 Jun  7 14:09 hipfft
+drwxr-xr-x  3 root root   25 Jun  7 14:09 hipfort
+drwxr-xr-x  4 root root   32 Jun  7 14:09 hiprand
+drwxr-xr-x  4 root root   44 Jun  7 14:09 hipsolver
+drwxr-xr-x  4 root root   44 Jun  7 14:09 hipsparse
+```
+
+and
+
+```console
+drwxr-xr-x  4 root root   32 Jun  7 14:09 rocalution
+drwxr-xr-x  4 root root   44 Jun  7 14:09 rocblas
+drwxr-xr-x  4 root root   44 Jun  7 14:09 rocfft
+drwxr-xr-x  4 root root   32 Jun  7 14:09 rocprim
+drwxr-xr-x  4 root root   32 Jun  7 14:09 rocrand
+drwxr-xr-x  4 root root   44 Jun  7 14:09 rocsolver
+drwxr-xr-x  4 root root   44 Jun  7 14:09 rocsparse
+drwxr-xr-x  3 root root   29 Jun  7 14:09 rocthrust
+```
+
+## Using HipBlas Library
+
+The basic code in HIP that uses hipBlas looks like this.
+This a full code and you can copy and paste it into a file.
+For this example we use `hipblas.hip.cpp`.
+
+```console
+#include <cstdio>
+#include <vector>
+#include <cstdlib>
+#include <hip/hip_runtime.h>
+#include <hipblas/hipblas.h>
+
+
+int main()
+{
+    srand(9600);
+
+    int width = 10;
+    int height = 7;
+    int elem_count = width * height;
+
+
+    // initialization of data in CPU memory
+
+    float * h_A;
+    hipHostMalloc(&h_A, elem_count * sizeof(*h_A));
+    for(int i = 0; i < elem_count; i++)
+        h_A[i] = (100.0f * rand()) / (float)RAND_MAX;
+    printf("Matrix A:\n");
+    for(int r = 0; r < height; r++)
+    {
+        for(int c = 0; c < width; c++)
+            printf("%6.3f  ", h_A[r + height * c]);
+        printf("\n");
+    }
+
+    float * h_x;
+    hipHostMalloc(&h_x, width * sizeof(*h_x));
+    for(int i = 0; i < width; i++)
+        h_x[i] = (100.0f * rand()) / (float)RAND_MAX;
+    printf("vector x:\n");
+    for(int i = 0; i < width; i++)
+        printf("%6.3f  ", h_x[i]);
+    printf("\n");
+
+    float * h_y;
+    hipHostMalloc(&h_y, height * sizeof(*h_y));
+    for(int i = 0; i < height; i++)
+        h_x[i] = 100.0f + i;
+    printf("vector y:\n");
+    for(int i = 0; i < height; i++)
+        printf("%6.3f  ", h_x[i]);
+    printf("\n");
+
+
+    // initialization of data in GPU memory
+
+    float * d_A;
+    size_t pitch_A;
+    hipMallocPitch((void**)&d_A, &pitch_A, height * sizeof(*d_A), width);
+    hipMemcpy2D(d_A, pitch_A, h_A, height * sizeof(*d_A), height * sizeof(*d_A), width, hipMemcpyHostToDevice);
+    int lda = pitch_A / sizeof(float);
+
+    float * d_x;
+    hipMalloc(&d_x, width * sizeof(*d_x));
+    hipMemcpy(d_x, h_x, width * sizeof(*d_x), hipMemcpyHostToDevice);
+
+    float * d_y;
+    hipMalloc(&d_y, height * sizeof(*d_y));
+    hipMemcpy(d_y, h_y, height * sizeof(*d_y), hipMemcpyHostToDevice);
+
+
+    // basic calculation of the result on the CPU
+
+    float alpha=2.0f, beta=10.0f;
+
+    for(int i = 0; i < height; i++)
+        h_y[i] *= beta;
+    for(int r = 0; r < height; r++)
+        for(int c = 0; c < width; c++)
+            h_y[r] += alpha * h_x[c] * h_A[r + height * c];
+    printf("result y CPU:\n");
+    for(int i = 0; i < height; i++)
+        printf("%6.3f  ", h_y[i]);
+    printf("\n");
+
+
+    // calculation of the result on the GPU using the hipBLAS library
+
+    hipblasHandle_t blas_handle;
+    hipblasCreate(&blas_handle);
+
+    hipblasSgemv(blas_handle, HIPBLAS_OP_N, height, width, &alpha, d_A, lda, d_x, 1, &beta, d_y, 1);
+    hipDeviceSynchronize();
+
+    hipblasDestroy(blas_handle);
+
+
+    // copy the GPU result to CPU memory and print it
+    hipMemcpy(h_y, d_y, height * sizeof(*d_y), hipMemcpyDeviceToHost);
+    printf("result y BLAS:\n");
+    for(int i = 0; i < height; i++)
+        printf("%6.3f  ", h_y[i]);
+    printf("\n");
+
+
+    // free all the allocated memory
+    hipFree(d_A);
+    hipFree(d_x);
+    hipFree(d_y);
+    hipHostFree(h_A);
+    hipHostFree(h_x);
+    hipHostFree(h_y);
+
+    return 0;
+}
+```
+
+The code compilation can be done as follows:
+
+```console
+hipcc hipblas.hip.cpp -o hipblas.x -lhipblas
+```
+
+## Using HipSolver Library
+
+The basic code in HIP that uses hipSolver looks like this.
+This a full code and you can copy and paste it into a file.
+For this example we use `hipsolver.hip.cpp`.
+
+```console
+#include <cstdio>
+#include <vector>
+#include <cstdlib>
+#include <algorithm>
+#include <hipsolver/hipsolver.h>
+#include <hipblas/hipblas.h>
+
+int main()
+{
+    srand(63456);
+
+    int size = 10;
+
+
+    // allocation and initialization of data on host. this time we use std::vector
+
+    int h_A_ld = size;
+    int h_A_pitch = h_A_ld * sizeof(float);
+    std::vector<float> h_A(size * h_A_ld);
+    for(int r = 0; r < size; r++)
+        for(int c = 0; c < size; c++)
+            h_A[r * h_A_ld + c] = (10.0 * rand()) / RAND_MAX;
+    printf("System matrix A:\n");
+    for(int r = 0; r < size; r++)
+    {
+        for(int c = 0; c < size; c++)
+            printf("%6.3f  ", h_A[r * h_A_ld + c]);
+        printf("\n");
+    }
+
+    std::vector<float> h_b(size);
+    for(int i = 0; i < size; i++)
+        h_b[i] = (10.0 * rand()) / RAND_MAX;
+    printf("RHS vector b:\n");
+    for(int i = 0; i < size; i++)
+        printf("%6.3f  ", h_b[i]);
+    printf("\n");
+
+    std::vector<float> h_x(size);
+
+
+    // memory allocation on the device and initialization
+
+    float * d_A;
+    size_t d_A_pitch;
+    hipMallocPitch((void**)&d_A, &d_A_pitch, size, size);
+    int d_A_ld = d_A_pitch / sizeof(float);
+
+    float * d_b;
+    hipMalloc(&d_b, size * sizeof(float));
+
+    float * d_x;
+    hipMalloc(&d_x, size * sizeof(float));
+
+    int * d_piv;
+    hipMalloc(&d_piv, size * sizeof(int));
+
+    int * info;
+    hipMallocManaged(&info, sizeof(int));
+
+    hipMemcpy2D(d_A, d_A_pitch, h_A.data(), h_A_pitch, size * sizeof(float), size, hipMemcpyHostToDevice);
+    hipMemcpy(d_b, h_b.data(), size * sizeof(float), hipMemcpyHostToDevice);
+
+
+    // solving the system using hipSOLVER
+
+    hipsolverHandle_t solverHandle;
+    hipsolverCreate(&solverHandle);
+
+    int wss_trf, wss_trs; // wss = WorkSpace Size
+    hipsolverSgetrf_bufferSize(solverHandle, size, size, d_A, d_A_ld, &wss_trf);
+    hipsolverSgetrs_bufferSize(solverHandle, HIPSOLVER_OP_N, size, 1, d_A, d_A_ld, d_piv, d_b, size, &wss_trs);
+    float * workspace;
+    int wss = std::max(wss_trf, wss_trs);
+    hipMalloc(&workspace, wss * sizeof(float));
+
+    hipsolverSgetrf(solverHandle, size, size, d_A, d_A_ld, workspace, wss, d_piv, info);
+    hipsolverSgetrs(solverHandle, HIPSOLVER_OP_N, size, 1, d_A, d_A_ld, d_piv, d_b, size, workspace, wss, info);
+
+    hipMemcpy(d_x, d_b, size * sizeof(float), hipMemcpyDeviceToDevice);
+    hipMemcpy(h_x.data(), d_x, size * sizeof(float), hipMemcpyDeviceToHost);
+    printf("Solution vector x:\n");
+    for(int i = 0; i < size; i++)
+        printf("%6.3f  ", h_x[i]);
+    printf("\n");
+
+    hipFree(workspace);
+
+    hipsolverDestroy(solverHandle);
+
+
+    // perform matrix-vector multiplication A*x using hipBLAS to check if the solution is correct
+
+    hipblasHandle_t blasHandle;
+    hipblasCreate(&blasHandle);
+
+    float alpha = 1;
+    float beta = 0;
+    hipMemcpy2D(d_A, d_A_pitch, h_A.data(), h_A_pitch, size * sizeof(float), size, hipMemcpyHostToDevice);
+    hipblasSgemv(blasHandle, HIPBLAS_OP_N, size, size, &alpha, d_A, d_A_ld, d_x, 1, &beta, d_b, 1);
+    hipDeviceSynchronize();
+
+    hipblasDestroy(blasHandle);
+
+    for(int i = 0; i < size; i++)
+        h_b[i] = 0;
+    hipMemcpy(h_b.data(), d_b, size * sizeof(float), hipMemcpyDeviceToHost);
+    printf("Check multiplication vector Ax:\n");
+    for(int i = 0; i < size; i++)
+        printf("%6.3f  ", h_b[i]);
+    printf("\n");
+
+
+    // free all the allocated memory
+
+    hipFree(info);
+    hipFree(d_piv);
+    hipFree(d_x);
+    hipFree(d_b);
+    hipFree(d_A);
+
+    return 0;
+}
+```
+
+The code compilation can be done as follows:
+
+```console
+hipcc hipsolver.hip.cpp -o hipsolver.x -lhipblas -lhipsolver
+```
+
+## Using OpenMP Offload to Program AMD GPUs
+
+The ROCm™ installation includes an LLVM-based implementation that fully supports the OpenMP 4.5 standard
+and a subset of the OpenMP 5.0 standard.
+Fortran, C/C++ compilers, and corresponding runtime libraries are included.
+
+The OpenMP toolchain is automatically installed as part of the standard ROCm installation
+and is available under `/opt/rocm/llvm`. The sub-directories are:
+
+- `bin` : Compilers (flang and clang) and other binaries.
+- `examples` : The usage section below shows how to compile and run these programs.
+- `include` : Header files.
+- `lib` : Libraries including those required for target offload.
+- `lib-debug` : Debug versions of the above libraries.
+
+More information can be found in the [AMD OpenMP Support Guide](https://docs.amd.com/bundle/OpenMP-Support-Guide-v5.5/page/Introduction_to_OpenMP_Support_Guide.html).
+
+## Compilation of OpenMP Code
+
+Basic example that uses OpenMP offload is here.
+Again, code is complete and can be copied and pasted into a file.
+Here we use `vadd.cpp`.
+
+```console
+#include <cstdio>
+#include <cstdlib>
+
+int main(int argc, char ** argv)
+{
+    long long count = 1 << 20;
+    if(argc > 1)
+        count = atoll(argv[1]);
+    long long print_count = 16;
+    if(argc > 2)
+        print_count = atoll(argv[2]);
+
+    long long * a = new long long[count];
+    long long * b = new long long[count];
+    long long * c = new long long[count];
+
+#pragma omp parallel for
+    for(long long i = 0; i < count; i++)
+    {
+        a[i] = i;
+        b[i] = 10 * i;
+    }
+
+    printf("A: ");
+    for(long long i = 0; i < print_count; i++)
+        printf("%3lld ", a[i]);
+    printf("\n");
+
+    printf("B: ");
+    for(long long i = 0; i < print_count; i++)
+        printf("%3lld ", b[i]);
+    printf("\n");
+
+#pragma omp target map(to: a[0:count],b[0:count]) map(from: c[0:count])
+#pragma omp teams distribute parallel for
+    for(long long i = 0; i < count; i++)
+    {
+        c[i] = a[i] + b[i];
+    }
+
+    printf("C: ");
+    for(long long i = 0; i < print_count; i++)
+        printf("%3lld ", c[i]);
+    printf("\n");
+
+    delete[] a;
+    delete[] b;
+    delete[] c;
+
+    return 0;
+}
+```
+
+This code can be compiled like this:
+
+```console
+/opt/rocm/llvm/bin/clang++ -O3 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 vadd.cpp -o vadd.x
+```
+
+These options are required for target offload from an OpenMP program:
+
+- `-target x86_64-pc-linux-gnu`
+- `-fopenmp`
+- `-fopenmp-targets=amdgcn-amd-amdhsa`
+- `-Xopenmp-target=amdgcn-amd-amdhsa`
+
+This flag specifies the GPU architecture of targeted GPU.
+You need to chage this when moving for instance to LUMI with MI250X GPU.
+The MI100 GPUs presented in CS have code `gfx908`:
+
+- `-march=gfx908`
+
+Note: You also have to include the `O0`, `O2`, `O3` or `O3` flag.
+Without this flag the execution of the compiled code fails.
No results found