Skip to content
Snippets Groups Projects
html_md.sh 3.01 KiB
Newer Older
  • Learn to ignore specific revisions
  • Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    #!/bin/sh
    
    if [ "$1" = "-d" ]; then
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# delete all .md files
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	if [ "$2" = "-html" ]; then
    		# delete all .html files
    		find . -name "*.ht*" | 
    		while read i; 
    		do
    			rm "$i"; 	
    			echo "$i deleted"; 
    		done
    	fi
    	if [ "$2" = "-md" ]; then
    		# delete all .md files
    		find . -name "*.md" | 
    		while read i; 
    		do
    			rm "$i"; 		
    			echo "$i deleted"; 
    		done
    	fi
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    if [ "$1" = "-i" ]; then
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	# create folder info
    	mkdir info;
    
    	find . -name "*.png" | 
    	while read i; 
    	do		
    		echo "$i" >> ./info/list_png.txt; 
    	done
    
    	find . -name "*.jpg" | 
    	while read i; 
    	do		
    		echo "$i" >> ./info/list_jpg.txt; 
    	done
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find . -name "*.md" | 
    	while read i; 
    	do		
    		echo "$i" >> ./info/list_md.txt; 
    	done
    fi
    if [ "$1" = "-w" ]; then
    	# download html pages 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	wget -X changelog,whats-new,portal_css,portal_javascripts,++resource++jquery-ui-themes,anselm-cluster-documentation/icon.jpg -R favicon.ico,pdf.png,logo.png,background.png,application.png,search_icon.png,png.png,sh.png,touch_icon.png,anselm-cluster-documentation/icon.jpg,*js,robots.txt,*xml,RSS,download_icon.png,*zip,*rar,@@*,anselm-cluster-documentation/icon.jpg.1 --mirror --convert-links --adjust-extension --page-requisites  --no-parent https://docs.it4i.cz;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi
    if [ "$1" = "-c" ]; then
    	# delete all .md files
    	find . -name "*.md" | 
    	while read i; 
    	do
    		rm "$i"; 
    		echo "$i deleted"; 
    	done
    	
    	rm -rf info;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    	# html -> md
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	find . -name "*.ht*" | 
    	while read i; 
    	do 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# filtering html
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		echo "$i"; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\tfiltering html files...\n";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		HEAD=$(grep -n -m1 '<h1' "$i" |cut -f1 -d: | tr --delete '\n')
    		END=$(grep -n -m1 '<!-- <div tal:content=' "$i" |cut -f1 -d: | tr --delete '\n')
    		LAST=$(wc -l "$i" | cut -f1 -d' ')
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		DOWN=$((LAST-END+2))
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		sed '1,'"$((HEAD-1))"'d' "$i" | sed -n -e :a -e '1,'"$DOWN"'!{P;N;D;};N;ba' > "${i%.*}TMP.html"	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    
    		# converted .html to .md
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\t.html -> .md\n"
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		pandoc -f html -t markdown+pipe_tables-grid_tables "${i%.*}TMP.html" -o "${i%.*}.md"; 
    		rm "${i%.*}TMP.html";
    
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# create filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		grep -o -P '(?<={).*(?=})' "${i%.*}.md" | sort -u | sed '/{/d' | sed '/\$/d' >> filter_auto;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		sort -u filter_auto -o filter_auto; 
    
    		# exceptions filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		(while read y; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		do 
    			# search and delete according with filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			sed -e 's/'"$y"'//g' filter_auto > filter_autoTMP;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			cat filter_autoTMP > filter_auto;
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		done) < exceptions_filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# text filtering of html, css, ...
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		printf "\t\tautomatic filter...\n"
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		(while read y; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		do 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			# search and delete according with filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			sed -e 's/{'"$y"'}//g' "${i%.*}.md" | sed -e 's/\\//g' | sed -e 's/^: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed -e 's/^Obsah//g' | sed -e 's/{#putty---before-we-start-//g' | sed -e 's/ssh-connection style="text-alignstart; "}//g'| awk -v RS='\n\n\n\n\n' 1 > "${i%.*}TMP.md";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    			cat "${i%.*}TMP.md" > "${i%.*}.md";
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		done) < filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		# delete temporary files
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    		rm "${i%.*}TMP.md";		
    	done
    	find -type f -size -10c | 
    	while read i; 
    	do
    		rm "$i"; 	
    		echo "$i deleted"; 
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	done
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	rm filter_autoTMP
    	rm filter_auto
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    	rm docs.it4i.cz/robots.txt
    
    Lukáš Krupčík's avatar
    Lukáš Krupčík committed
    fi