Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/sh
### .html -> .md, filtering ###
# parameters -d ... delete all .md files
# files: html_md.sh, filter.txt
# run in .
### version 1.0
if [ "$1" = "-d" ]; then
# delete all .md files
rm -rf info;
find . -name "*.md" |
while read i;
do
rm "$i";
echo "$i deleted";
done
else
mkdir info
find . -name "*.ht*" |
while read i;
do
# converted .html to .md
pandoc -f html -t markdown+pipe_tables-grid_tables "$i" -o "${i%.*}TMP.md";
echo "$i";
echo "\t\t.html -> .md"
# folder info, file strukture, list of all files and his addres into folders
echo "${i%.*}" >> ./info/strukture;
# remove html tags, empty lines, first 21 lines and last 59 lines
cat "${i%.*}TMP.md" | sed -e 's/<[^>]*>//g' | sed '/^$/d' | sed '1,21d' | sed -n -e :a -e '1,58!{P;N;D;};N;ba'> "${i%.*}.md";
echo "\t\tfiltering...";
# text filtering of html, css, ..
cat filter.txt |
while read y;
do
# search and delete according with filter
cat "${i%.*}.md" | sed -e 's/'"$y"'//g' | sed -e 's/\\//g' > "${i%.*}TMP.md";
cat "${i%.*}TMP.md" > "${i%.*}.md";
done
# delete temporary files
rm "${i%.*}TMP.md";
done
fi