diff --git a/scripts/url-interni-test.py b/scripts/url-interni-test.py index 4e60ee8269d831d3b3789734e48ebd8bbdb991bd..40a2a6aae93ef9a94910efd6e1a741231259ccfe 100644 --- a/scripts/url-interni-test.py +++ b/scripts/url-interni-test.py @@ -1,31 +1,35 @@ -#!/bin/python3 +#!/usr/bin/python3 + +""" +Script to check internal links and section references in MDX files. +""" import re from pathlib import Path def extract_links(content): - """ Extrahuje všechny internà odkazy ze souboru. """ - link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Vše kromě http/https + """Extract all internal links from the file.""" + link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Everything except http/https return link_pattern.findall(content) def extract_headers(content): - """ Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """ + """Extract all H1-H6 headers for hash reference checks.""" header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE) - return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)} + return {f"#{match[1].lower().replace(' ', '-')}" for match in header_pattern.findall(content)} def check_internal_links(directory): - """ Kontroluje existenci souborů a hash sekcà pro internà odkazy. """ + """Check the existence of files and hash sections for internal links.""" mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")} file_headers = {} for mdx_file, path in mdx_files.items(): - with open(path, "r", encoding="utf-8") as f: - content = f.read() + with open(path, "r", encoding="utf-8") as file: + content = file.read() file_headers[mdx_file] = extract_headers(content) for mdx_file, path in mdx_files.items(): - with open(path, "r", encoding="utf-8") as f: - content = f.read() + with open(path, "r", encoding="utf-8") as file: + content = file.read() links = extract_links(content) for link in links: @@ -35,13 +39,13 @@ def check_internal_links(directory): file_target = (Path(mdx_file).parent / file_part).resolve() - # Kontrola existence souboru + # Check if the file exists if file_part and file_target not in mdx_files.values(): print(f"❌ Broken file link in {mdx_file}: {link}") - # Kontrola existence sekce + # Check if the section exists elif hash_part and hash_part not in file_headers.get(file_part, {}): print(f"⚠️ Broken section link in {mdx_file}: {link}") -check_internal_links("content/docs") - +if __name__ == "__main__": + check_internal_links("content/docs") diff --git a/scripts/url-test.py b/scripts/url-test.py index 36b9f6325dfb8d7274a582ab7ae26869e993fb8a..f1dd40894b26d38489938d4bdb68b6ab88a5e612 100644 --- a/scripts/url-test.py +++ b/scripts/url-test.py @@ -1,24 +1,34 @@ -#!/bin/python3 +#!/usr/bin/python3 + +""" +Script to check external links in MDX files. +""" import re -import requests from pathlib import Path +import requests def check_links_in_mdx(directory): + """ + Scans MDX files in the given directory for external links and checks their availability. + + :param directory: Path to the directory containing MDX files. + """ mdx_files = Path(directory).rglob("*.mdx") url_pattern = re.compile(r'\[.*?\]\((http[s]?://.*?)\)') for mdx_file in mdx_files: - with open(mdx_file, "r", encoding="utf-8") as f: - content = f.read() + with open(mdx_file, "r", encoding="utf-8") as file: + content = file.read() links = url_pattern.findall(content) for link in links: try: response = requests.head(link, allow_redirects=True, timeout=5) if response.status_code >= 400: - print(f"Broken link in {mdx_file}: {link} (Status: {response.status_code})") + print(f"❌ Broken link in {mdx_file}: {link} (Status: {response.status_code})") except requests.RequestException: - print(f"Error checking {link} in {mdx_file}") + print(f"⚠️ Error checking {link} in {mdx_file}") -check_links_in_mdx("content/docs") +if __name__ == "__main__": + check_links_in_mdx("content/docs")