Skip to content
Snippets Groups Projects
html_md.sh 5.33 KiB
Newer Older
Lukáš Krupčík's avatar
Lukáš Krupčík committed
#!/bin/bash
Lukáš Krupčík's avatar
Lukáš Krupčík committed

Lukáš Krupčík's avatar
Lukáš Krupčík committed
### DOWNLOAD AND CONVERT DOCUMENTATION
# autor: kru0052
Lukáš Krupčík's avatar
Lukáš Krupčík committed
# version: 0.35
# change: repair images bugs, change version number -1 - beta
# bugs: bad formatting tables, bad links for other files, stayed a few html elements, formatting bugs...
Lukáš Krupčík's avatar
Lukáš Krupčík committed
###
Lukáš Krupčík's avatar
Lukáš Krupčík committed

Lukáš Krupčík's avatar
Lukáš Krupčík committed
if [ "$1" = "-t" ]; then
	# testing new function
	
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	echo "Testing 1..."
	
	rm -rf ./converted 
	
	# exists file for move?	
	if [ -f ./info/list_md.txt ];
	then  	
		mkdir converted;
		(while read i; 
		do		
			mkdir "./converted/$i";
		done) < ./source/list_folder.txt
	
		# move md files to new folders
		while read a b ; do 
			cp "$a" "./converted/$b"; 
			
		done < <(paste ./info/list_md.txt ./source/list_md_mv.txt)
		# copy jpg and jpeg to new folders
		
		#cat "${i%.*}TMP.md" > "${i%.*}.md";
		
		while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_image.txt ./source/list_image_mv.txt)
		
		while read a ; do 
			
			echo "$a";
			 
			sed -e 's/``` /```/' "./converted/$a" | sed -e 's/  //' | sed -e 's/<span class="pln">//' | sed -e 's/<span//' | sed -e 's/class="pln">//' | sed -e 's/<\/span>//' | sed -e 's/^\*\*//' | sed -e 's/\^\[>\[1<span>\]<\/span>\]//' > "./converted/${a%.*}TMP.md";
		
			while read x ; do 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
				arg1=`echo "$x" | cut -d"&" -f1 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;	
				arg2=`echo $x | cut -d"&" -f2 | sed 's:[]\[\^\$\.\*\/\"]:\\\\&:g'`;
Lukáš Krupčík's avatar
Lukáš Krupčík committed
			#echo "$arg1"; 
			#echo ">$arg2";
		
				sed -e 's/'"$arg1"'/'"$arg2"'/' "./converted/${a%.*}TMP.md" > "./converted/${a%TMP.*}.TEST.md";
				cat "./converted/${a%TMP.*}.TEST.md" > "./converted/${a%.*}TMP.md";
			done < ./source/replace.txt 
			cat "./converted/${a%.*}TMP.md" > "./converted/${a%.*}.md";
			rm "./converted/${a%.*}TMP.md";
			rm "./converted/${a%TMP.*}.TEST.md";
		done <./source/list_md_mv.txt 
		
	else
	   echo "list_md.txt not exists!!!!!"
	fi

	
fi
Lukáš Krupčík's avatar
Lukáš Krupčík committed
if [ "$1" = "-w" ]; then
	# download html pages 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	wget -X pbspro-documentation,changelog,whats-new,portal_css,portal_javascripts,++resource++jquery-ui-themes,anselm-cluster-documentation/icon.jpg -R favicon.ico,pdf.png,logo.png,background.png,application.png,search_icon.png,png.png,sh.png,touch_icon.png,anselm-cluster-documentation/icon.jpg,*js,robots.txt,*xml,RSS,download_icon.png,pdf,*zip,*rar,@@*,anselm-cluster-documentation/icon.jpg.1 --mirror --convert-links --adjust-extension --page-requisites  --no-parent https://docs.it4i.cz;
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	
	wget http://verif.cs.vsb.cz/aislinn/doc/report.png
	mv report.png ./converted/salomon/software/debuggers/
	
Lukáš Krupčík's avatar
Lukáš Krupčík committed
fi
if [ "$1" = "-c" ]; then
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	### convert html to md
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	# erasing the previous transfer
	rm -rf converted;
	rm -rf info;
	
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	find . -name "*.ht*" | 
	while read i; 
	do 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		# first filtering html 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		echo "$i"; 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		printf "\t\tfiltering html files...\n";
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		HEAD=$(grep -n -m1 '<h1' "$i" |cut -f1 -d: | tr --delete '\n')
		END=$(grep -n -m1 '<!-- <div tal:content=' "$i" |cut -f1 -d: | tr --delete '\n')
		LAST=$(wc -l "$i" | cut -f1 -d' ')
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		DOWN=$((LAST-END+2))

Lukáš Krupčík's avatar
Lukáš Krupčík committed
		sed '1,'"$((HEAD-1))"'d' "$i" | sed -n -e :a -e '1,'"$DOWN"'!{P;N;D;};N;ba' > "${i%.*}TMP.html"	
Lukáš Krupčík's avatar
Lukáš Krupčík committed

		# converted .html to .md
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		printf "\t\t.html -> .md\n"
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		pandoc -f html -t markdown+pipe_tables-grid_tables "${i%.*}TMP.html" -o "${i%.*}.md"; 
		rm "${i%.*}TMP.html";

Lukáš Krupčík's avatar
Lukáš Krupčík committed
		# create filter_auto (css, html, ...)
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		grep -o -P '(?<={).*(?=})' "${i%.*}.md" | sort -u | sed '/{/d' | sed '/\$/d' >> filter_auto;
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		sort -u filter_auto -o filter_auto; 

Lukáš Krupčík's avatar
Lukáš Krupčík committed
		# exceptions filter_auto 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		(while read y; 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		do 
			# search and delete according with filter_auto
Lukáš Krupčík's avatar
Lukáš Krupčík committed
			sed -e 's/'"$y"'//g' filter_auto > filter_autoTMP;
Lukáš Krupčík's avatar
Lukáš Krupčík committed
			cat filter_autoTMP > filter_auto;
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		done) < ./source/exceptions_filter_auto
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		# text filtering of html, css, ...
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		printf "\t\tautomatic filter...\n"
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		(while read y; 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		do 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
			# search and delete according with filter_auto and erase unwanted elements
Lukáš Krupčík's avatar
Lukáš Krupčík committed
			sed -e 's/{'"$y"'}//g' "${i%.*}.md" | sed -e 's/\\//g' | sed -e 's/^: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed -e 's/^Obsah//g' | sed -e 's/{#putty---before-we-start-//g' | sed -e 's/ssh-connection style="text-alignstart; "}//g'| awk -v RS='\n\n\n\n\n' 1 > "${i%.*}TMP.md";
Lukáš Krupčík's avatar
Lukáš Krupčík committed
			cat "${i%.*}TMP.md" > "${i%.*}.md";
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		done) < filter_auto
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		# delete temporary files
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		rm "${i%.*}TMP.md";		
	done
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	
	# delete empty files, duplicate files and unwanted files
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	find -type f -size -10c | 
	while read i; 
	do
		rm "$i"; 	
		echo "$i deleted"; 
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	done
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	rm filter_autoTMP
	rm filter_auto
Lukáš Krupčík's avatar
Lukáš Krupčík committed

	(while read i; 
	do		
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		if [ -f "$i" ];
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		then
		   rm "$i";
		fi
		
	done) < ./source/list_rm.txt
	
	### create new folder and move converted files
	# create folder info and view all files and folder
	mkdir info;
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	find ./docs.it4i.cz -name "*.png" -type f > ./info/list_image.txt;
	find ./docs.it4i.cz -name "*.jpg" -type f >> ./info/list_image.txt;
	find ./docs.it4i.cz -name "*.jpeg" -type f >> ./info/list_image.txt;
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	find ./docs.it4i.cz -name "*.md" -type f> ./info/list_md.txt; 
	find ./docs.it4i.cz -type d | sort > ./info/list_folder.txt
	
	# exists file for move?	
	if [ -f ./info/list_md.txt ];
	then  	
		mkdir converted;
		(while read i; 
		do		
			mkdir "./converted/$i";
		done) < ./source/list_folder.txt
	
		# move md files to new folders
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_md.txt ./source/list_md_mv.txt)
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		# copy jpg and jpeg to new folders
Lukáš Krupčík's avatar
Lukáš Krupčík committed
		while read a b ; do cp "$a" "./converted/$b"; done < <(paste ./info/list_image.txt ./source/list_image_mv.txt)
Lukáš Krupčík's avatar
Lukáš Krupčík committed
	else
	   echo "list_md.txt not exists!!!!!"
	fi
Lukáš Krupčík's avatar
Lukáš Krupčík committed
fi