Skip to content
Snippets Groups Projects
html_md.sh 1.13 KiB
Newer Older
Lukáš Krupčík's avatar
Lukáš Krupčík committed
#!/bin/sh

### .html -> .md, filtering ###
# parameters -d ... delete all .md files
# files: html_md.sh, filter.txt
# run in .
### version 1.0

if [ "$1" = "-d" ]; then
	# delete all .md files
	rm -rf info;
	find . -name "*.md" | 
	while read i; 
	do
		rm "$i"; 
		
		echo "$i deleted"; 
	done
else
	mkdir info
	find . -name "*.ht*" | 
	while read i; 
	do 
		# converted .html to .md
		pandoc -f html -t markdown+pipe_tables-grid_tables "$i" -o "${i%.*}TMP.md"; 
		echo "$i"; 
		echo "\t\t.html -> .md"
		# folder info, file strukture, list of all files and his addres into folders
		echo "${i%.*}" >> ./info/strukture;
		# remove html tags, empty lines, first 21 lines and last 59 lines
		cat "${i%.*}TMP.md" | sed -e 's/<[^>]*>//g' | sed '/^$/d' | sed '1,21d' | sed -n -e :a -e '1,58!{P;N;D;};N;ba'> "${i%.*}.md";
		echo "\t\tfiltering...";
		# text filtering of html, css, ..
		cat filter.txt | 
		while read y; 
		do 
			# search and delete according with filter
			cat "${i%.*}.md" | sed -e 's/'"$y"'//g' | sed -e 's/\\//g' > "${i%.*}TMP.md";
			cat "${i%.*}TMP.md" > "${i%.*}.md";
		done


		# delete temporary files
		rm "${i%.*}TMP.md";
	done
fi