From 29c9e3cab05f0e81defd5e1b4d4808061a04bb14 Mon Sep 17 00:00:00 2001 From: Branislav Jansik <branislav.jansik@vsb.cz> Date: Fri, 3 May 2024 15:46:05 +0200 Subject: [PATCH] Update shell-and-data-access.md --- docs.it4i/general/shell-and-data-access.md | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/docs.it4i/general/shell-and-data-access.md b/docs.it4i/general/shell-and-data-access.md index 6687ba5ed..69db41f6f 100644 --- a/docs.it4i/general/shell-and-data-access.md +++ b/docs.it4i/general/shell-and-data-access.md @@ -206,8 +206,64 @@ local $ hq submit --log=/dev/null --progress --array --each-line jobfile \ bash -c 'rsync -R $HQ_ENTRY username@cluster-name.it4i.cz:mydir' ``` +Again, the **-n** argument detemines the number of files to transfer in one rsync call. Set according to file size and count (large for many small files). + #### Single Very Large File +To transfer single very large file efficienty, we need to transfer many blocks of the file in parallel, utilizing multiple cores to accelerate ssh encryption and multiple tcp streams for enhanced bandwidth. + +First, set up ssh-agent single sign on as [described above][10]. +Second, start the HyperQueue server and HyperQueue worker: + +```console +local $ hq server start & +local $ hq worker start & +``` + +Once set up, run the hqtransfer script listed below: + +```console +local $ ./hqtransfer mybigfile username@cluster-name.it4i.cz outputpath/outputfile +``` + +The hqtransfer script is listed below: + +```console +#!/bin/bash +#Read input +if [ -z $1 ]; then echo Usage: $0 'input_file ssh_destination [output_path/output_file]'; exit; fi +INFILE=$1 + +if [ -z $2 ]; then echo Usage: $0 'input_file ssh_destination [output_path/output_file]'; exit; fi +DEST=$2 + +OUTFILE=$INFILE +if [ ! -z $3 ]; then OUTFILE=$3; fi + +#Calculate transfer blocks +SIZE=$(($(stat --printf %s $INFILE)/1024/1024/1024)) +echo Transfering $(($SIZE+1)) x 1GB blocks + +#Execute +SECONDS=0 +hq submit --log=/dev/null --progress --array 0-$SIZE /bin/bash -c \ + "dd if=$INFILE bs=1G count=1 skip=\$HQ_TASK_ID | \ + ssh -c aes256-gcm@openssh.com $DEST \ + dd of=$OUTFILE bs=1G conv=notrunc seek=\$HQ_TASK_ID" + +#Stats +echo "Transfered: $(($SIZE+1))GB in $SECONDS s" +echo "Transfer speed: $((($SIZE+1)/$SECONDS)) GB/s" + +exit +``` + +Copy-paste the script into `hqtransfer` file and set executable flags: + +```console +local $ chmod u+x hqtransfer +``` + ### Data Transfer From Windows Clients On Windows, use the [WinSCP client][c] to transfer data. The [win-sshfs client][d] provides a way to mount the cluster filesystems directly as an external disc. @@ -300,6 +356,7 @@ Now, configure the applications proxy settings to `localhost:6000`. Use port for [7]: ../general/accessing-the-clusters/graphical-user-interface/vnc.md [8]: ../general/accessing-the-clusters/vpn-access.md [9]: #port-forwarding-from-compute-nodes +[10]: [b]: http://linux.die.net/man/1/sshfs [c]: http://winscp.net/eng/download.php -- GitLab