#!/bin/sh if [ "$1" = "-d" ]; then # delete all .md files if [ "$2" = "-html" ]; then # delete all .html files find . -name "*.ht*" | while read i; do rm "$i"; echo "$i deleted"; done fi if [ "$2" = "-md" ]; then # delete all .md files find . -name "*.md" | while read i; do rm "$i"; echo "$i deleted"; done fi fi if [ "$1" = "-i" ]; then # create folder info mkdir info; find . -name "*.png" | while read i; do echo "$i" >> ./info/list_png.txt; done find . -name "*.jpg" | while read i; do echo "$i" >> ./info/list_jpg.txt; done find . -name "*.md" | while read i; do echo "$i" >> ./info/list_md.txt; done fi if [ "$1" = "-w" ]; then # download html pages wget -X changelog,whats-new,portal_css,portal_javascripts,++resource++jquery-ui-themes,anselm-cluster-documentation/icon.jpg -R favicon.ico,pdf.png,logo.png,background.png,application.png,search_icon.png,png.png,sh.png,touch_icon.png,anselm-cluster-documentation/icon.jpg,*js,robots.txt,*xml,RSS,download_icon.png,*zip,*rar,@@*,anselm-cluster-documentation/icon.jpg.1 --mirror --convert-links --adjust-extension --page-requisites --no-parent https://docs.it4i.cz; fi if [ "$1" = "-c" ]; then # delete all .md files find . -name "*.md" | while read i; do rm "$i"; echo "$i deleted"; done rm -rf info; # html -> md find . -name "*.ht*" | while read i; do # filtering html echo "$i"; printf "\t\tfiltering html files...\n"; HEAD=$(grep -n -m1 '<h1' "$i" |cut -f1 -d: | tr --delete '\n') END=$(grep -n -m1 '<!-- <div tal:content=' "$i" |cut -f1 -d: | tr --delete '\n') LAST=$(wc -l "$i" | cut -f1 -d' ') DOWN=$((LAST-END+2)) sed '1,'"$((HEAD-1))"'d' "$i" | sed -n -e :a -e '1,'"$DOWN"'!{P;N;D;};N;ba' > "${i%.*}TMP.html" # converted .html to .md printf "\t\t.html -> .md\n" pandoc -f html -t markdown+pipe_tables-grid_tables "${i%.*}TMP.html" -o "${i%.*}.md"; rm "${i%.*}TMP.html"; # create filter_auto grep -o -P '(?<={).*(?=})' "${i%.*}.md" | sort -u | sed '/{/d' | sed '/\$/d' >> filter_auto; sort -u filter_auto -o filter_auto; # exceptions filter_auto (while read y; do # search and delete according with filter_auto sed -e 's/'"$y"'//g' filter_auto > filter_autoTMP; cat filter_autoTMP > filter_auto; done) < exceptions_filter_auto # text filtering of html, css, ... printf "\t\tautomatic filter...\n" (while read y; do # search and delete according with filter_auto sed -e 's/{'"$y"'}//g' "${i%.*}.md" | sed -e 's/\\//g' | sed -e 's/^: //g' | sed -e 's/<\/div>//g' | sed '/^<div/d' | sed -e 's/^Obsah//g' | sed -e 's/{#putty---before-we-start-//g' | sed -e 's/ssh-connection style="text-alignstart; "}//g'| awk -v RS='\n\n\n\n\n' 1 > "${i%.*}TMP.md"; cat "${i%.*}TMP.md" > "${i%.*}.md"; done) < filter_auto # delete temporary files rm "${i%.*}TMP.md"; done find -type f -size -10c | while read i; do rm "$i"; echo "$i deleted"; done rm filter_autoTMP rm filter_auto rm docs.it4i.cz/robots.txt fi