modified: scripts/url-interni-test.py

modified: scripts/url-test.py

modified: scripts/url-interni-test.py
25161c91 · Lukáš Krupčík · f697f03b · 25161c91 · 25161c91
Commit 25161c91 authored 4 months ago by Lukáš Krupčík
--- a/scripts/url-interni-test.py
+++ b/scripts/url-interni-test.py
-#!/bin/python3
+#!/usr/bin/python3
+"""
+Script to check internal links and section references in MDX files.
+"""
 import re
 from pathlib import Path
 def extract_links(content):
-    """ Extrahuje všechny interní odkazy ze souboru. """
+    """Extract all internal links from the file."""
-    link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)')  # Vše kromě http/https
+    link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)')  # Everything except http/https
    return link_pattern.findall(content)
 def extract_headers(content):
-    """ Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """
+    """Extract all H1-H6 headers for hash reference checks."""
    header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
-    return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)}
+    return {f"#{match[1].lower().replace(' ', '-')}" for match in header_pattern.findall(content)}
 def check_internal_links(directory):
-    """ Kontroluje existenci souborů a hash sekcí pro interní odkazy. """
+    """Check the existence of files and hash sections for internal links."""
    mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
    file_headers = {}
    for mdx_file, path in mdx_files.items():
-        with open(path, "r", encoding="utf-8") as f:
+        with open(path, "r", encoding="utf-8") as file:
-            content = f.read()
+            content = file.read()
            file_headers[mdx_file] = extract_headers(content)
    for mdx_file, path in mdx_files.items():
-        with open(path, "r", encoding="utf-8") as f:
+        with open(path, "r", encoding="utf-8") as file:
-            content = f.read()
+            content = file.read()
            links = extract_links(content)
            for link in links:
@@ -35,13 +39,13 @@ def check_internal_links(directory):
                file_target = (Path(mdx_file).parent / file_part).resolve()
-                # Kontrola existence souboru
+                # Check if the file exists
                if file_part and file_target not in mdx_files.values():
                    print(f"❌ Broken file link in {mdx_file}: {link}")
-                # Kontrola existence sekce
+                # Check if the section exists
                elif hash_part and hash_part not in file_headers.get(file_part, {}):
                    print(f"⚠️ Broken section link in {mdx_file}: {link}")
-check_internal_links("content/docs")
+if __name__ == "__main__":
+    check_internal_links("content/docs")
--- a/scripts/url-test.py
+++ b/scripts/url-test.py
-#!/bin/python3
+#!/usr/bin/python3
+"""
+Script to check external links in MDX files.
+"""
 import re
-import requests
 from pathlib import Path
+import requests
 def check_links_in_mdx(directory):
+    """
+    Scans MDX files in the given directory for external links and checks their availability.
+    :param directory: Path to the directory containing MDX files.
+    """
    mdx_files = Path(directory).rglob("*.mdx")
    url_pattern = re.compile(r'\[.*?\]\((http[s]?://.*?)\)')
    for mdx_file in mdx_files:
-        with open(mdx_file, "r", encoding="utf-8") as f:
+        with open(mdx_file, "r", encoding="utf-8") as file:
-            content = f.read()
+            content = file.read()
            links = url_pattern.findall(content)
            for link in links:
                try:
                    response = requests.head(link, allow_redirects=True, timeout=5)
                    if response.status_code >= 400:
-                        print(f"Broken link in {mdx_file}: {link} (Status: {response.status_code})")
+                        print(f"❌ Broken link in {mdx_file}: {link} (Status: {response.status_code})")
                except requests.RequestException:
-                    print(f"Error checking {link} in {mdx_file}")
+                    print(f"⚠️ Error checking {link} in {mdx_file}")
-check_links_in_mdx("content/docs")
+if __name__ == "__main__":
+    check_links_in_mdx("content/docs")