Newer
Older
#!/bin/bash -e
# Source: Tools in action by Samuel Antao (AMD)
# https://docs.lumi-supercomputer.eu/software/packages/pytorch/
# Contribution: M. Jaros (IT4Innovations)
wd=$(pwd)
#
# Example assume allocation was created, e.g.:
# N=1 ; salloc -p standard-g --threads-per-core 1 --exclusive -N $N --gpus $((N*8)) -t 4:00:00 --mem 0
#
set -x
# download from https://hub.docker.com/r/rocm/pytorch or use from Samuel Antao:
#SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-4305da4654f4.sif
SIF=/pfs/lustrep2/projappl/project_462000125/samantao-public/containers/lumi-pytorch-rocm-5.5.1-python-3.10-pytorch-v2.0.1-dockerhash-d55f9163ed80.sif
rm -rf $wd/run-me.sh
cat > $wd/run-me.sh << EOF
#!/bin/bash -e
# Start conda environment inside the container
\$WITH_CONDA
# Run application
python -c 'import torch; print("I have this many devices:", torch.cuda.device_count())'
EOF
chmod +x $wd/run-me.sh