From f697f03bb7779f344d1c463e76426d50f8d8ec61 Mon Sep 17 00:00:00 2001
From: Lukas Krupcik <lukas.krupcik@vsb.cz>
Date: Mon, 3 Mar 2025 13:56:09 +0100
Subject: [PATCH] 	new file:   scripts/url-interni-test.py 	new
 file:   scripts/url-test.py

---
 scripts/url-interni-test.py | 47 +++++++++++++++++++++++++++++++++++++
 scripts/url-test.py         | 24 +++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 scripts/url-interni-test.py
 create mode 100644 scripts/url-test.py

diff --git a/scripts/url-interni-test.py b/scripts/url-interni-test.py
new file mode 100644
index 00000000..4e60ee82
--- /dev/null
+++ b/scripts/url-interni-test.py
@@ -0,0 +1,47 @@
+#!/bin/python3
+
+import re
+from pathlib import Path
+
+def extract_links(content):
+    """ Extrahuje všechny interní odkazy ze souboru. """
+    link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)')  # Vše kromě http/https
+    return link_pattern.findall(content)
+
+def extract_headers(content):
+    """ Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """
+    header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
+    return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)}
+
+def check_internal_links(directory):
+    """ Kontroluje existenci souborů a hash sekcí pro interní odkazy. """
+    mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
+    file_headers = {}
+
+    for mdx_file, path in mdx_files.items():
+        with open(path, "r", encoding="utf-8") as f:
+            content = f.read()
+            file_headers[mdx_file] = extract_headers(content)
+
+    for mdx_file, path in mdx_files.items():
+        with open(path, "r", encoding="utf-8") as f:
+            content = f.read()
+            links = extract_links(content)
+
+            for link in links:
+                parts = link.split("#")
+                file_part = parts[0] if parts[0] else mdx_file
+                hash_part = f"#{parts[1]}" if len(parts) > 1 else None
+
+                file_target = (Path(mdx_file).parent / file_part).resolve()
+
+                # Kontrola existence souboru
+                if file_part and file_target not in mdx_files.values():
+                    print(f"❌ Broken file link in {mdx_file}: {link}")
+
+                # Kontrola existence sekce
+                elif hash_part and hash_part not in file_headers.get(file_part, {}):
+                    print(f"⚠️ Broken section link in {mdx_file}: {link}")
+
+check_internal_links("content/docs")
+
diff --git a/scripts/url-test.py b/scripts/url-test.py
new file mode 100644
index 00000000..36b9f632
--- /dev/null
+++ b/scripts/url-test.py
@@ -0,0 +1,24 @@
+#!/bin/python3
+
+import re
+import requests
+from pathlib import Path
+
+def check_links_in_mdx(directory):
+    mdx_files = Path(directory).rglob("*.mdx")
+    url_pattern = re.compile(r'\[.*?\]\((http[s]?://.*?)\)')
+
+    for mdx_file in mdx_files:
+        with open(mdx_file, "r", encoding="utf-8") as f:
+            content = f.read()
+            links = url_pattern.findall(content)
+
+            for link in links:
+                try:
+                    response = requests.head(link, allow_redirects=True, timeout=5)
+                    if response.status_code >= 400:
+                        print(f"Broken link in {mdx_file}: {link} (Status: {response.status_code})")
+                except requests.RequestException:
+                    print(f"Error checking {link} in {mdx_file}")
+
+check_links_in_mdx("content/docs")
-- 
GitLab