Skip to content
Snippets Groups Projects
html_md.sh 8.91 KiB
Newer Older
  • Learn to ignore specific revisions
  • Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    #!/bin/bash
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    ### DOWNLOAD AND CONVERT DOCUMENTATION
    # autor: kru0052
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    # version: 0.46
    # change: repair tables, optimalization
    # bugs: bad links for other files, formatting bugs, bad bash and other code (formatting)...
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    ###
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    if [ "$1" = "-t" ]; then
    	# testing new function
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	echo "Testing..."
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	printf "\t\tformatting tables...\n"
    	cat ./test > ./test.md;
    	
    	while read x ; do 
    		arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
    		arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
    	
    		sed -e 's/'"$arg1"'/'"$arg2"'/' ./test.md > ./test.tmp;
    		
    		sed -e :a -e '/\\$/N; s/\\\n//; ta' ./test.tmp > ./test.md;
    		#cat ./test.tmp > ./test.md;
    	done < ./source/tab
    	
    	
    	
    fi
    if [ "$1" = "-t1" ]; then
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	STARTTIME=$(date +%s)
    	
    		sleep 5
    	
    	ENDTIME=$(date +%s)
    	echo "It takes $(($ENDTIME - $STARTTIME)) seconds to complete this task..."
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    if [ "$1" = "-w" ]; then
    	# download html pages 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
       STARTTIME=$(date +%s)
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	rm -rf docs.it4i.cz
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	wget -X pbspro-documentation,changelog,whats-new,portal_css,portal_javascripts,++resource++jquery-ui-themes,anselm-cluster-documentation/icon.jpg -R favicon.ico,pdf.png,logo.png,background.png,application.png,search_icon.png,png.png,sh.png,touch_icon.png,anselm-cluster-documentation/icon.jpg,*js,robots.txt,*xml,RSS,download_icon.png,pdf,*zip,*rar,@@*,anselm-cluster-documentation/icon.jpg.1 --mirror --convert-links --adjust-extension --page-requisites  --no-parent https://docs.it4i.cz;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# download images
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	wget --directory-prefix=./docs.it4i.cz/ http://verif.cs.vsb.cz/aislinn/doc/report.png
    	wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/virtualization/virtualization-job-workflow
    	wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig1.png
    	wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig2.png
    	wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig3.png
    	wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig4.png
    	wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig5.png
    	wget --directory-prefix=./docs.it4i.cz/ https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig6.png
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
       ENDTIME=$(date +%s)
    	echo "It takes $(($ENDTIME - $STARTTIME)) seconds to complete this task..."	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi
    if [ "$1" = "-c" ]; then
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	### convert html to md
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
       STARTTIME=$(date +%s)
       if [ -d ./docs.it4i.cz ]; then
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# erasing the previous transfer
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	rm -rf converted;
    	rm -rf info;
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# erasing duplicate files and unwanted files
    	(while read i; 
    	do		
    		if [ -f "$i" ];
    		then
    			echo "$(tput setaf 9)$i deleted";
    		  	rm "$i";
    		fi
    		
    	done) < ./source/list_rm
    	
    	counter=1
    	count=$(find . -name "*.html" -type f | wc -l)
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find . -name "*.ht*" | 
    	while read i; 
    	do 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# first filtering html 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		echo "$(tput setaf 12)($counter/$count)$(tput setaf 11)$i"; 
    		counter=$((counter+1))
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "$(tput setaf 15)\t\tfiltering html files...\n";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		HEAD=$(grep -n -m1 '<h1' "$i" |cut -f1 -d: | tr --delete '\n')
    		END=$(grep -n -m1 '<!-- <div tal:content=' "$i" |cut -f1 -d: | tr --delete '\n')
    		LAST=$(wc -l "$i" | cut -f1 -d' ')
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		DOWN=$((LAST-END+2))
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		sed '1,'"$((HEAD-1))"'d' "$i" | sed -n -e :a -e '1,'"$DOWN"'!{P;N;D;};N;ba' > "${i%.*}TMP.html"	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# converted .html to .md
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\t.html -> .md\n"
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		pandoc -f html -t markdown+pipe_tables-grid_tables "${i%.*}TMP.html" -o "${i%.*}.md"; 
    		rm "${i%.*}TMP.html";
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# filtering html and css elements...
    		printf "\t\tfiltering html and css elements...\n"
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		sed -e 's/``` /```/' "${i%.*}.md"  | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed -e 's/<\/span>//' | sed -e 's/^\*\*//' | sed -e 's/\\//g' | sed -e 's/^: //g' | sed -e 's/^Obsah//g' > "${i%.*}TMP.md";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		while read x ; do 
    			arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
    			arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
    	
    			sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}TMP.TEST.md";
    			cat "${i%.*}TMP.TEST.md" > "${i%.*}TMP.md";
    		done < ./source/replace 
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# repair formatting...
    		printf "\t\trepair formatting...\n"
    		while read x ; do 
    			arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
    			arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
    	
    			sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}TMP.TEST.md";
    			cat "${i%.*}TMP.TEST.md" > "${i%.*}TMP.md";
    		done < ./source/formatting 
    		
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# repair image...
    		printf "\t\trepair images...\n"
    		while read x ; do 
    			arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
    			arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
    	
    			sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}.md";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			cat -s "${i%.*}.md" > "${i%.*}TMP.md";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		done < ./source/repairIMG
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# repair tables
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\trepair tables...\n"
    		while read x ; do 
    			arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
    			arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
    	
    			sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}.md";		
    			sed -e :a -e '/\\$/N; s/\\\n//; ta' "${i%.*}.md" > "${i%.*}TMP.md";
    		done < ./source/tab
    		
    		# replace tables
    		printf "\t\treplace tables...\n"
    		while read x ; do 
    			arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
    			arg2=`echo "$x" | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
    	
    			sed -e 's/'"$arg1"'/'"$arg2"'/' "${i%.*}TMP.md" > "${i%.*}.md";		
    			cat -s "${i%.*}.md" > "${i%.*}TMP.md";
    		done < ./source/tabREPLACE
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		cat "${i%.*}TMP.md" > "${i%.*}.md";
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# delete temporary files
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		rm "${i%.*}TMP.md";
    		rm "${i%.*}TMP.TEST.md";		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	done
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# delete empty files
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find -type f -size -10c | 
    	while read i; 
    	do
    		rm "$i"; 	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		echo "$(tput setaf 9)$i deleted"; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	done
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    	
    	### create new folder and move converted files
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# create folder info and list all files and folder
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	mkdir info;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	echo "$(tput setaf 11)create folder info and file lists"; 
    	find ./docs.it4i.cz -name "*.png" -type f > ./info/list_image;
    	find ./docs.it4i.cz -name "*.jpg" -type f >> ./info/list_image;
    	find ./docs.it4i.cz -name "*.jpeg" -type f >> ./info/list_image;
    	find ./docs.it4i.cz -name "*.md" -type f> ./info/list_md; 
    	find ./docs.it4i.cz -type d | sort > ./info/list_folder;
    	
    	count=$(find . -name "*.md" -type f | wc -l)
    	if [ $count -eq 150 ]; then
    		rm -rf ./converted 
    	
    		mkdir converted;
    		(while read i; 
    		do		
    			mkdir "./converted/$i";
    		done) < ./source/list_folder
    	
    		# move md files to new folders
    		echo "$(tput setaf 11)moved md files"; 
    		while read a b ; do 
    			mv "$a" "./converted/$b";
    		done < <(paste ./info/list_md ./source/list_md_mv)
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# copy jpg and jpeg to new folders
    		echo "$(tput setaf 11)copy image files";
    		while read a b ; do 
    			cp "$a" "./converted/$b";
    		done < <(paste ./info/list_image ./source/list_image_mv)
    		cp ./docs.it4i.cz/salomon/salomon ./converted/docs.it4i.cz/salomon/salomon
    		cp ./docs.it4i.cz/salomon/salomon-2 ./converted/docs.it4i.cz/salomon/salomon-2
    		cp ./converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/fairshare_formula.png ./converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/fairshare_formula.png
    		cp ./converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job_sort_formula.png ./converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job_sort_formula.png
    		cp ./converted/docs.it4i.cz/salomon/software/debuggers/vtune-amplifier.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vtune-amplifier.png
    		cp ./converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160708v12.33.35.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20160708v12.33.35.png
    		cp ./docs.it4i.cz/virtualization-job-workflow ./converted/docs.it4i.cz/anselm-cluster-documentation/software/ 
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# listall files and folder /converted
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		find ./converted -name "*.png" -type f > ./info/list_image_converted;
    		find ./converted -name "*.jpg" -type f >> ./info/list_image_converted;
    		find ./converted -name "*.jpeg" -type f >> ./info/list_image_converted;
    		find ./converted -name "*.md" -type f> ./info/list_md_converted; 
    		find ./converted -type d | sort > ./info/list_folder_converted;	
    	
    		echo "$(tput setaf 11)COMPLETED...$(tput setaf 15)"; 
    	else
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\n\n$(tput setaf 9)Can not create a folder converted, because the number of MD files disagrees. The converted files remain in the folder docs.it4i.cz !!!!...$(tput setaf 15)\n\n";
    	fi
       else
       	printf "\n\n$(tput setaf 9)folder docs.it4i.cz not exists!!!!...$(tput setaf 15)\n\nRun html_md.sh -w\n\n";
       fi
       ENDTIME=$(date +%s)
       echo "It takes $(($ENDTIME - $STARTTIME)) seconds to complete this task..."
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi