Forked from
SCS / docs.it4i.cz
1533 commits behind, 670 commits ahead of the upstream repository.
-
Lukáš Krupčík authoredLukáš Krupčík authored
html_md.sh 9.37 KiB
#!/bin/bash
### DOWNLOAD AND CONVERT DOCUMENTATION
# autor: kru0052
# version: 1.00
###
if [ "$1" = "-d" ]; then
# remove pdf, md and epub files
STARTTIME=$(date +%s)
if [ "$2" = "pdf" ]; then
echo "$(tput setaf 9)*.pdf deleted$(tput setaf 15)"
if [ -d ./pdf ]; then
rm -rf ./pdf
fi
elif [ "$2" = "epub" ]; then
echo "$(tput setaf 9)*.epub deleted$(tput setaf 15)"
if [ -d ./epub ]; then
rm -rf ./epub
fi
elif [ "$2" = "md" ]; then
echo "$(tput setaf 9)*.md deleted$(tput setaf 15)"
if [ -d ./converted ]; then
rm -rf ./converted
fi
elif [ "$2" = "all" ]; then
echo "$(tput setaf 9)all files deleted$(tput setaf 15)"
if [ -d ./docs.it4i ]; then
rm -rf ./converted
fi
if [ -d ./epub ]; then
rm -rf ./epub
fi
if [ -d ./pdf ]; then
rm -rf ./pdf
fi
if [ -d ./info ]; then
rm -rf ./info
fi
if [ -d ./docs.it4i.cz ]; then
rm -rf ./docs.it4i.cz
fi
fi
ENDTIME=$(date +%s)
echo "It takes $(($ENDTIME - $STARTTIME)) seconds to complete this task..."
fi
if [ "$1" = "-w" ]; then
# download html pages
STARTTIME=$(date +%s)
rm -rf docs.it4i.cz
wget -X pbspro-documentation,changelog,whats-new,portal_css,portal_javascripts,++resource++jquery-ui-themes,anselm-cluster-documentation/icon.jpg -R favicon.ico,pdf.png,logo.png,background.png,application.png,search_icon.png,png.png,sh.png,touch_icon.png,anselm-cluster-documentation/icon.jpg,*js,robots.txt,*xml,RSS,download_icon.png,pdf,*zip,*rar,@@*,anselm-cluster-documentation/icon.jpg.1 --mirror --convert-links --adjust-extension --page-requisites --no-parent https://docs.it4i.cz;
# download images
wget --directory-prefix=./docs.it4i.cz/ http://verif.cs.vsb.cz/aislinn/doc/report.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/virtualization/virtualization-job-workflow
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig1.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig2.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig3.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig4.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig5.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig6.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig7.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig7x.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig8.png
wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig9.png
ENDTIME=$(date +%s)
echo "It takes $(($ENDTIME - $STARTTIME)) seconds to complete this task..."
fi
if [ "$1" = "-c" ]; then
### convert html to md
STARTTIME=$(date +%s)
if [ -d ./docs.it4i.cz ]; then
# erasing the previous transfer
if [ -d ./docs.it4i ]; then
rm -rf ./docs.it4i
fi
if [ -d ./info ]; then
rm -rf ./info;
fi
# erasing duplicate files and unwanted files
(while read i;
do
if [ -f "$i" ];
then
echo "$(tput setaf 9)$i deleted";
rm "$i";
fi
done) < ./source/list_rm
# counter for html and md files
counter=1
count=$(find . -name "*.html" -type f | wc -l)
find . -name "*.ht*" |
while read i;
do
# filtering html files
echo "$(tput setaf 12)($counter/$count)$(tput setaf 11)$i";
counter=$((counter+1))
printf "$(tput setaf 15)\t\tFiltering html files...\n";
HEAD=$(grep -n -m1 '<h1' "$i" |cut -f1 -d: | tr --delete '\n')
END=$(grep -n -m1 '<!-- <div tal:content=' "$i" |cut -f1 -d: | tr --delete '\n')
LAST=$(wc -l "$i" | cut -f1 -d' ')
DOWN=$((LAST-END+2))
sed '1,'"$((HEAD-1))"'d' "$i" | sed -n -e :a -e '1,'"$DOWN"'!{P;N;D;};N;ba' > "${i%.*}TMP.html"
# converted .html to .md
printf "\t\t.html => $(tput setaf 13).md\n$(tput setaf 15)"
pandoc -f html -t markdown+pipe_tables-grid_tables "${i%.*}TMP.html" -o "${i%.*}.md";
rm "${i%.*}TMP.html";
# filtering html and css elements...
printf "\t\tFiltering html and css elements in md files...\n"
sed -e 's/``` /```/' "${i%.*}.md" | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed -e 's/<\/span>//' | sed -e 's/^\*\*//' | sed -e 's/\\//g' | sed -e 's/^: //g' | sed -e 's/^Obsah//g' > "${i%.*}TMP.md";
while read x ; do
arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}TMP.TEST.md";
cat -s "${i%.*}TMP.TEST.md" > "${i%.*}TMP.md";
done < ./source/replace
# repair formatting...
printf "\t\tFix formatting text...\n"
while read x ; do
arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" | sed -e 's/^``//g' > "${i%.*}TMP.TEST.md";
cat -s "${i%.*}TMP.TEST.md" > "${i%.*}TMP.md";
done < ./source/formatting
# last repair formatting...
printf "\t\tLatest fix formatting text...\n"
while read x ; do
arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}TMP.TEST.md";
cat -s "${i%.*}TMP.TEST.md" > "${i%.*}TMP.md";
done < ./source/lastFilter
cat "${i%.*}TMP.md" > "${i%.*}.md";
# delete temporary files
rm "${i%.*}TMP.md";
rm "${i%.*}TMP.TEST.md";
done
# delete empty files
find -type f -size -10c |
while read i;
do
rm "$i";
echo "$(tput setaf 9)$i deleted";
done
### create new folder and move converted files
# create folder info and list all files and folders
mkdir info;
echo "$(tput setaf 11)Create folder info and lists od files...";
find ./docs.it4i.cz -name "*.png" -type f > ./info/list_image;
find ./docs.it4i.cz -name "*.jpg" -type f >> ./info/list_image;
find ./docs.it4i.cz -name "*.jpeg" -type f >> ./info/list_image;
find ./docs.it4i.cz -name "*.md" -type f> ./info/list_md;
find ./docs.it4i.cz -type d | sort > ./info/list_folder;
count=$(find ./docs.it4i.cz -name "*.md" -type f | wc -l)
echo "$count"
if [ $count -eq 150 ]; then
mkdir docs.it4i;
(while read i;
do
mkdir "./docs.it4i/$i";
done) < ./source/list_folder
# move md files to folder converted
echo "$(tput setaf 11)Moved md files...";
while read a b ; do
mv "$a" "./docs.it4i/$b";
done < <(paste ./info/list_md ./source/list_md_mv)
# copy jpg, jpeg and png to folder converted
echo "$(tput setaf 11)Copy image files...";
while read a b ; do
cp "$a" "./docs.it4i/$b";
done < <(paste ./info/list_image ./source/list_image_mv)
cp ./docs.it4i.cz/salomon/salomon ./docs.it4i/salomon/salomon
cp ./docs.it4i.cz/salomon/salomon-2 ./docs.it4i/salomon/salomon-2
cp ./docs.it4i/salomon/resource-allocation-and-job-execution/fairshare_formula.png ./docs.it4i/anselm-cluster-documentation/resource-allocation-and-job-execution/fairshare_formula.png
cp ./docs.it4i/salomon/resource-allocation-and-job-execution/job_sort_formula.png ./docs.it4i/anselm-cluster-documentation/resource-allocation-and-job-execution/job_sort_formula.png
cp ./docs.it4i/salomon/software/debuggers/vtune-amplifier.png ./docs.it4i/anselm-cluster-documentation/software/debuggers/vtune-amplifier.png
cp ./docs.it4i/salomon/software/debuggers/Snmekobrazovky20160708v12.33.35.png ./docs.it4i/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20160708v12.33.35.png
cp ./docs.it4i.cz/virtualization-job-workflow ./docs.it4i/anselm-cluster-documentation/software/
cp ./docs.it4i.cz/anselm-cluster-documentation/anyconnecticon.jpg ./docs.it4i/salomon/accessing-the-cluster/anyconnecticon.jpg
cp ./docs.it4i.cz/anselm-cluster-documentation/anyconnectcontextmenu.jpg ./docs.it4i/salomon/accessing-the-cluster/anyconnectcontextmenu.jpg
cp ./docs.it4i.cz/get-started-with-it4innovations/accessing-the-clusters/graphical-user-interface/vnc/TightVNC_login.png ./docs.it4i/salomon/software/debuggers/TightVNC_login.png
# list all files and folder converted
find ./docs.it4i -name "*.png" -type f > ./info/list_image_converted;
find ./docs.it4i -name "*.jpg" -type f >> ./info/list_image_converted;
find ./docs.it4i -name "*.jpeg" -type f >> ./info/list_image_converted;
find ./docs.it4i -name "*.md" -type f> ./info/list_md_converted;
find ./docs.it4i -type d | sort > ./info/list_folder_converted;
echo "$(tput setaf 13)COMPLETED...$(tput setaf 15)";
else
printf "\n\n$(tput setaf 9)Can not create a folder docs.it4i, because the number of MD files disagrees. The converted files remain in the folder docs.it4i.cz !!!!...$(tput setaf 15)\n\n";
fi
else
printf "\n\n$(tput setaf 9)Folder docs.it4i.cz not exists!!!!...$(tput setaf 15)\n\nRun html_md.sh -w\n\n";
fi
ENDTIME=$(date +%s)
echo "It takes $(($ENDTIME - $STARTTIME)) seconds to complete this task..."
fi