#!/bin/bash ### DOWNLOAD AND CONVERT DOCUMENTATION # autor: kru0052 # version: 0.4 # change: repair bugs and optimalizations # bugs: bad formatting tables, bad links for other files, stayed a few html elements, formatting bugs... ### if [ "$1" = "-t" ]; then # testing new function fi if [ "$1" = "-w" ]; then # download html pages wget -X pbspro-documentation,changelog,whats-new,portal_css,portal_javascripts,++resource++jquery-ui-themes,anselm-cluster-documentation/icon.jpg -R favicon.ico,pdf.png,logo.png,background.png,application.png,search_icon.png,png.png,sh.png,touch_icon.png,anselm-cluster-documentation/icon.jpg,*js,robots.txt,*xml,RSS,download_icon.png,pdf,*zip,*rar,@@*,anselm-cluster-documentation/icon.jpg.1 --mirror --convert-links --adjust-extension --page-requisites --no-parent https://docs.it4i.cz; wget --directory-prefix=./docs.it4i.cz/ http://verif.cs.vsb.cz/aislinn/doc/report.png wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/virtualization/virtualization-job-workflow wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig1.png wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig2.png wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig3.png wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig4.png wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig5.png wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig6.png fi if [ "$1" = "-c" ]; then ### convert html to md # erasing the previous transfer rm -rf converted; rm -rf info; # erasing duplicate files and unwanted files (while read i; do if [ -f "$i" ]; then echo "$(tput setaf 9)$i deleted"; rm "$i"; fi done) < ./source/list_rm counter=1 count=$(find . -name "*.html" -type f | wc -l) find . -name "*.ht*" | while read i; do # first filtering html echo "$(tput setaf 12)($counter/$count)$(tput setaf 11)$i"; counter=$((counter+1)) printf "$(tput setaf 15)\t\tfirst filtering html files...\n"; HEAD=$(grep -n -m1 '<h1' "$i" |cut -f1 -d: | tr --delete '\n') END=$(grep -n -m1 '<!-- <div tal:content=' "$i" |cut -f1 -d: | tr --delete '\n') LAST=$(wc -l "$i" | cut -f1 -d' ') DOWN=$((LAST-END+2)) sed '1,'"$((HEAD-1))"'d' "$i" | sed -n -e :a -e '1,'"$DOWN"'!{P;N;D;};N;ba' > "${i%.*}TMP.html" # converted .html to .md printf "\t\t.html -> .md\n" pandoc -f html -t markdown+pipe_tables-grid_tables "${i%.*}TMP.html" -o "${i%.*}.md"; rm "${i%.*}TMP.html"; # second filtering html and css elements... printf "\t\tsecond filtering html and css elements...\n" sed -e 's/``` /```/' "${i%.*}.md" | sed -e 's/ //' | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed -e 's/<\/span>//' | sed -e 's/^\*\*//' | sed -e 's/\\//g' | sed -e 's/^: //g' | sed -e 's/^Obsah//g' > "${i%.*}TMP.md"; while read x ; do arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`; arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`; sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}TMP.TEST.md"; cat "${i%.*}TMP.TEST.md" > "${i%.*}TMP.md"; done < ./source/replace # repair image... printf "\t\trepair images...\n" while read x ; do arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`; arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`; sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}.md"; cat "${i%.*}.md" > "${i%.*}TMP.md"; done < ./source/repairIMG cat "${i%.*}TMP.md" > "${i%.*}.md"; # delete temporary files rm "${i%.*}TMP.md"; rm "${i%.*}TMP.TEST.md"; done # delete empty files find -type f -size -10c | while read i; do rm "$i"; echo "$(tput setaf 9)$i deleted"; done ### create new folder and move converted files # create folder info and view all files and folder mkdir info; find ./docs.it4i.cz -name "*.png" -type f > ./info/list_image.txt; find ./docs.it4i.cz -name "*.jpg" -type f >> ./info/list_image.txt; find ./docs.it4i.cz -name "*.jpeg" -type f >> ./info/list_image.txt; find ./docs.it4i.cz -name "*.md" -type f> ./info/list_md.txt; find ./docs.it4i.cz -type d | sort > ./info/list_folder.txt rm -rf ./converted mkdir converted; (while read i; do mkdir "./converted/$i"; done) < ./source/list_folder # move md files to new folders while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_md.txt ./source/list_md_mv) # copy jpg and jpeg to new folders while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_image.txt ./source/list_image_mv.txt) cp ./docs.it4i.cz/salomon/salomon ./converted/docs.it4i.cz/salomon/salomon cp ./docs.it4i.cz/salomon/salomon-2 ./converted/docs.it4i.cz/salomon/salomon-2 cp ./converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/fairshare_formula.png ./converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/fairshare_formula.png cp ./converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job_sort_formula.png ./converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job_sort_formula.png cp ./converted/docs.it4i.cz/salomon/software/debuggers/vtune-amplifier.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vtune-amplifier.png cp ./converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160708v12.33.35.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20160708v12.33.35.png cp ./docs.it4i.cz/virtualization-job-workflow ./converted/docs.it4i.cz/anselm-cluster-documentation/software/ fi