From 8b7af72b22d4604e3d73e3ddb8fcac9ac4c1449c Mon Sep 17 00:00:00 2001
From: Lukas Krupcik <lukas.krupcik@vsb.cz>
Date: Mon, 18 Jul 2016 16:17:31 +0200
Subject: [PATCH] update filtering

---
 html_md.sh | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/html_md.sh b/html_md.sh
index 33bfe6c8a..929568cbe 100755
--- a/html_md.sh
+++ b/html_md.sh
@@ -26,7 +26,8 @@ if [ "$1" = "-w" ]; then
 	wget -bqc --mirror --convert-links --adjust-extension --page-requisites  --no-parent https://docs.it4i.cz;
 	pid=$(pgrep wget);
 	
-	FILE="docs.it4i.cz/@@search?sort_on=sortable_title&.html"
+	# loop for all download html files
+	FILE="docs.it4i.cz/@@search?sort_on=sortable_title&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&.html"
 
 	# test exists file -> yes - kill process wget
 
@@ -56,6 +57,8 @@ if [ "$1" = "-c" ]; then
 		rm "$i"; 
 		echo "$i deleted"; 
 	done
+	
+	rm -rf info;
 
 	# create folder info
 	mkdir info;
@@ -115,11 +118,11 @@ if [ "$1" = "-c" ]; then
 		while read y; 
 		do 
 			# search and delete according with filter_auto
-			cat "${i%.*}.md" | sed -e 's/{'"$y"'}//g' | sed -e 's/\\//g' | sed -e 's/: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d'  | sed '/^$/d' > "${i%.*}TMP.md";
+			cat "${i%.*}.md" | sed -e 's/{'"$y"'}//g' | sed -e 's/\\//g' | sed -e 's/^Q//g' | sed -e 's/: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d' | awk -v RS='\n\n\n\n\n' 1 > "${i%.*}TMP.md";
 			cat "${i%.*}TMP.md" > "${i%.*}.md";
 		done
 
-		echo "\t\tother filter..."
+		#echo "\t\tother filter..."
 		cat filter_other | 
 		while read a; 
 		do 
@@ -127,7 +130,7 @@ if [ "$1" = "-c" ]; then
 			cat "${i%.*}.md" | sed -e 's/'"$a"'//g'  > "${i%.*}TMP.md";
 			cat "${i%.*}TMP.md" > "${i%.*}.md";
 		done
-
+		
 		# delete temporary files
 		rm "${i%.*}TMP.md";
 		
@@ -135,8 +138,3 @@ if [ "$1" = "-c" ]; then
 	rm filter_autoTMP
 	rm filter_auto
 fi
-
-
-
-
-
-- 
GitLab