Skip to content
Snippets Groups Projects
Commit ae247fda authored by Milan Jaros's avatar Milan Jaros
Browse files

fix srun in scripts, fix path to singularity

parent 566a35c7
No related branches found
No related tags found
No related merge requests found
......@@ -33,17 +33,10 @@ module load rocm/5.2.3.lua
## Pytorch simple test (calling of torch.cuda.device_count())
### Base environment
### Run an interactive job
```
module purge
module load CrayEnv
module load PrgEnv-cray/8.3.3
module load craype-accel-amd-gfx90a
module load cray-python
# Default ROCm – more recent versions are preferable (e.g. ROCm 5.6.0)
module load rocm/5.2.3.lua
salloc -A project_XXX --partition=standard-g -N 1 -n 1 --gres=gpu:8 -t 01:00:00
```
### Scripts
......
......@@ -14,7 +14,8 @@ wd=$(pwd)
set -x
# download from https://hub.docker.com/r/rocm/pytorch or use from Samuel Antao:
SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-4305da4654f4.sif
#SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-4305da4654f4.sif
SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-d55f9163ed80.sif
rm -rf $wd/run-me.sh
cat > $wd/run-me.sh << EOF
......
......@@ -28,5 +28,5 @@ if [ ! -d $wd/pip-installs ] ; then
fi
PYTHONPATH=$wd/pip-installs \
srun -n1 --gpus 8 \
srun -n 1 \
python -c 'import torch; print("I have this many devices:", torch.cuda.device_count())'
......@@ -29,5 +29,5 @@ else
source cray-python-virtualenv/bin/activate
fi
srun -n1 --gpus 8 \
srun -n 1 \
python -c 'import torch; print("I have this many devices:", torch.cuda.device_count())'
......@@ -28,5 +28,5 @@ else
source $wd/miniconda3/bin/activate pytorch
fi
srun -n1 --gpus 8 \
srun -n 1 \
python -c 'import torch; print("I have this many devices:", torch.cuda.device_count())'
......@@ -31,5 +31,5 @@ fi
# Make sure conda libs can be loaded.
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
srun -n1 --gpus 8 \
srun -n 1 \
python -c 'import torch; print("I have this many devices:", torch.cuda.device_count())'
......@@ -14,7 +14,8 @@ wd=$(pwd)
set -x
# download from https://hub.docker.com/r/rocm/pytorch or use from Samuel Antao:
SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-4305da4654f4.sif
#SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-4305da4654f4.sif
SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-d55f9163ed80.sif
# check script
if [ ! -f $wd/run-me.sh ] ; then
......@@ -22,7 +23,7 @@ if [ ! -f $wd/run-me.sh ] ; then
exit 1
fi
srun -n1 --gpus 8 \
srun -n 1 \
singularity exec \
-B /var/spool/slurmd:/var/spool/slurmd \
-B /opt/cray:/opt/cray \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment