Skip to content
Snippets Groups Projects
html_md.sh 7.74 KiB
Newer Older
  • Learn to ignore specific revisions
  • Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    #!/bin/bash
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    ### DOWNLOAD AND CONVERT DOCUMENTATION
    # autor: kru0052
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    # version: 0.36
    # change: repair images bugs and add new filtering html and css elements
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    # bugs: bad formatting tables, bad links for other files, stayed a few html elements, formatting bugs...
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    ###
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    if [ "$1" = "-t" ]; then
    	# testing new function
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	echo "Testing 1..."
    	
    	rm -rf ./converted 
    	
    	# exists file for move?	
    	if [ -f ./info/list_md.txt ];
    	then  	
    		mkdir converted;
    		(while read i; 
    		do		
    			mkdir "./converted/$i";
    		done) < ./source/list_folder.txt
    	
    		# move md files to new folders
    		while read a b ; do 
    			cp "$a" "./converted/$b"; 
    			
    		done < <(paste ./info/list_md.txt ./source/list_md_mv.txt)
    		# copy jpg and jpeg to new folders
    		
    		#cat "${i%.*}TMP.md" > "${i%.*}.md";
    		
    		while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_image.txt ./source/list_image_mv.txt)
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		cp ./docs.it4i.cz/salomon/salomon ./converted/docs.it4i.cz/salomon/salomon
    		cp ./docs.it4i.cz/salomon/salomon-2 ./converted/docs.it4i.cz/salomon/salomon-2
    		cp ./converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/fairshare_formula.png ./converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/fairshare_formula.png
    		cp ./converted/docs.it4i.cz/salomon/resource-allocation-and-job-execution/job_sort_formula.png ./converted/docs.it4i.cz/anselm-cluster-documentation/resource-allocation-and-job-execution/job_sort_formula.png
    		cp ./converted/docs.it4i.cz/salomon/software/debuggers/vtune-amplifier.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/vtune-amplifier.png
    		cp ./converted/docs.it4i.cz/salomon/software/debuggers/Snmekobrazovky20160708v12.33.35.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/debuggers/Snmekobrazovky20160708v12.33.35.png
    		
    		wget https://docs.it4i.cz/anselm-cluster-documentation/software/virtualization/virtualization-job-workflow
    		mv ./virtualization-job-workflow ./converted/docs.it4i.cz/anselm-cluster-documentation/software/ 
    		wget https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig6.png
    		mv ./fig6.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig6.png
    		wget https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig5.png
    		mv ./fig5.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig5.png
    		wget https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig1.png
    		mv ./fig1.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig1.png
    		wget https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig2.png
    		mv ./fig2.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig2.png
    		wget https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig4.png
    		mv ./fig4.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig4.png
    		wget https://docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/images/fig3.png
    		mv ./fig3.png ./converted/docs.it4i.cz/anselm-cluster-documentation/software/omics-master-1/fig3.png
    		
    		
    		
    	else
    	   echo "list_md.txt not exists!!!!!"
    	fi
    
    	
    fi
    if [ "$1" = "-t1" ]; then
    	# testing new function
    	
    	echo "Testing 1..."
    	
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		while read a ; do 
    			
    			echo "$a";
    			 
    			sed -e 's/``` /```/' "./converted/$a" | sed -e 's/  //' | sed -e 's/<span class="pln">//' | sed -e 's/<span//' | sed -e 's/class="pln">//' | sed -e 's/<\/span>//' | sed -e 's/^\*\*//' | sed -e 's/\^\[>\[1<span>\]<\/span>\]//' > "./converted/${a%.*}TMP.md";
    		
    			while read x ; do 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    				arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
    				arg2=`echo $x | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			#echo "$arg1"; 
    			#echo ">$arg2";
    		
    				sed -e 's/'"$arg1"'/'"$arg2"'/' "./converted/${a%.*}TMP.md" > "./converted/${a%TMP.*}.TEST.md";
    				cat "./converted/${a%TMP.*}.TEST.md" > "./converted/${a%.*}TMP.md";
    			done < ./source/replace.txt 
    			cat "./converted/${a%.*}TMP.md" > "./converted/${a%.*}.md";
    			rm "./converted/${a%.*}TMP.md";
    			rm "./converted/${a%TMP.*}.TEST.md";
    		done <./source/list_md_mv.txt 
    		
    
    	
    fi
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    if [ "$1" = "-w" ]; then
    	# download html pages 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	wget -X pbspro-documentation,changelog,whats-new,portal_css,portal_javascripts,++resource++jquery-ui-themes,anselm-cluster-documentation/icon.jpg -R favicon.ico,pdf.png,logo.png,background.png,application.png,search_icon.png,png.png,sh.png,touch_icon.png,anselm-cluster-documentation/icon.jpg,*js,robots.txt,*xml,RSS,download_icon.png,pdf,*zip,*rar,@@*,anselm-cluster-documentation/icon.jpg.1 --mirror --convert-links --adjust-extension --page-requisites  --no-parent https://docs.it4i.cz;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    	wget http://verif.cs.vsb.cz/aislinn/doc/report.png
    	mv report.png ./converted/salomon/software/debuggers/
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi
    if [ "$1" = "-c" ]; then
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	### convert html to md
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# erasing the previous transfer
    	rm -rf converted;
    	rm -rf info;
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find . -name "*.ht*" | 
    	while read i; 
    	do 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# first filtering html 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		echo "$i"; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\tfiltering html files...\n";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		HEAD=$(grep -n -m1 '<h1' "$i" |cut -f1 -d: | tr --delete '\n')
    		END=$(grep -n -m1 '<!-- <div tal:content=' "$i" |cut -f1 -d: | tr --delete '\n')
    		LAST=$(wc -l "$i" | cut -f1 -d' ')
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		DOWN=$((LAST-END+2))
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		sed '1,'"$((HEAD-1))"'d' "$i" | sed -n -e :a -e '1,'"$DOWN"'!{P;N;D;};N;ba' > "${i%.*}TMP.html"	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    		# converted .html to .md
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\t.html -> .md\n"
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		pandoc -f html -t markdown+pipe_tables-grid_tables "${i%.*}TMP.html" -o "${i%.*}.md"; 
    		rm "${i%.*}TMP.html";
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# create filter_auto (css, html, ...)
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		grep -o -P '(?<={).*(?=})' "${i%.*}.md" | sort -u | sed '/{/d' | sed '/\$/d' >> filter_auto;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		sort -u filter_auto -o filter_auto; 
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# exceptions filter_auto 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		(while read y; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		do 
    			# search and delete according with filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			sed -e 's/'"$y"'//g' filter_auto > filter_autoTMP;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			cat filter_autoTMP > filter_auto;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		done) < ./source/exceptions_filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# text filtering of html, css, ...
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\tautomatic filter...\n"
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		(while read y; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		do 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			# search and delete according with filter_auto and erase unwanted elements
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			sed -e 's/{'"$y"'}//g' "${i%.*}.md" | sed -e 's/\\//g' | sed -e 's/^: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed -e 's/^Obsah//g' | sed -e 's/{#putty---before-we-start-//g' | sed -e 's/ssh-connection style="text-alignstart; "}//g'| awk -v RS='\n\n\n\n\n' 1 > "${i%.*}TMP.md";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			cat "${i%.*}TMP.md" > "${i%.*}.md";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		done) < filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# delete temporary files
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		rm "${i%.*}TMP.md";		
    	done
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    	# delete empty files, duplicate files and unwanted files
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find -type f -size -10c | 
    	while read i; 
    	do
    		rm "$i"; 	
    		echo "$i deleted"; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	done
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	rm filter_autoTMP
    	rm filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    	(while read i; 
    	do		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		if [ -f "$i" ];
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		then
    		   rm "$i";
    		fi
    		
    	done) < ./source/list_rm.txt
    	
    	### create new folder and move converted files
    	# create folder info and view all files and folder
    	mkdir info;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find ./docs.it4i.cz -name "*.png" -type f > ./info/list_image.txt;
    	find ./docs.it4i.cz -name "*.jpg" -type f >> ./info/list_image.txt;
    	find ./docs.it4i.cz -name "*.jpeg" -type f >> ./info/list_image.txt;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find ./docs.it4i.cz -name "*.md" -type f> ./info/list_md.txt; 
    	find ./docs.it4i.cz -type d | sort > ./info/list_folder.txt
    	
    	# exists file for move?	
    	if [ -f ./info/list_md.txt ];
    	then  	
    		mkdir converted;
    		(while read i; 
    		do		
    			mkdir "./converted/$i";
    		done) < ./source/list_folder.txt
    	
    		# move md files to new folders
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_md.txt ./source/list_md_mv.txt)
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# copy jpg and jpeg to new folders
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_image.txt ./source/list_image_mv.txt)
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	else
    	   echo "list_md.txt not exists!!!!!"
    	fi
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi