From 8b7af72b22d4604e3d73e3ddb8fcac9ac4c1449c Mon Sep 17 00:00:00 2001 From: Lukas Krupcik <lukas.krupcik@vsb.cz> Date: Mon, 18 Jul 2016 16:17:31 +0200 Subject: [PATCH] update filtering --- html_md.sh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/html_md.sh b/html_md.sh index 33bfe6c8a..929568cbe 100755 --- a/html_md.sh +++ b/html_md.sh @@ -26,7 +26,8 @@ if [ "$1" = "-w" ]; then wget -bqc --mirror --convert-links --adjust-extension --page-requisites --no-parent https://docs.it4i.cz; pid=$(pgrep wget); - FILE="docs.it4i.cz/@@search?sort_on=sortable_title&.html" + # loop for all download html files + FILE="docs.it4i.cz/@@search?sort_on=sortable_title&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&.html" # test exists file -> yes - kill process wget @@ -56,6 +57,8 @@ if [ "$1" = "-c" ]; then rm "$i"; echo "$i deleted"; done + + rm -rf info; # create folder info mkdir info; @@ -115,11 +118,11 @@ if [ "$1" = "-c" ]; then while read y; do # search and delete according with filter_auto - cat "${i%.*}.md" | sed -e 's/{'"$y"'}//g' | sed -e 's/\\//g' | sed -e 's/: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed '/^$/d' > "${i%.*}TMP.md"; + cat "${i%.*}.md" | sed -e 's/{'"$y"'}//g' | sed -e 's/\\//g' | sed -e 's/^Q//g' | sed -e 's/: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d' | awk -v RS='\n\n\n\n\n' 1 > "${i%.*}TMP.md"; cat "${i%.*}TMP.md" > "${i%.*}.md"; done - echo "\t\tother filter..." + #echo "\t\tother filter..." cat filter_other | while read a; do @@ -127,7 +130,7 @@ if [ "$1" = "-c" ]; then cat "${i%.*}.md" | sed -e 's/'"$a"'//g' > "${i%.*}TMP.md"; cat "${i%.*}TMP.md" > "${i%.*}.md"; done - + # delete temporary files rm "${i%.*}TMP.md"; @@ -135,8 +138,3 @@ if [ "$1" = "-c" ]; then rm filter_autoTMP rm filter_auto fi - - - - - -- GitLab