From 747a50bb7e9e4d9806b0f79ae7d0e89d5b13f4b0 Mon Sep 17 00:00:00 2001 From: Lukas Krupcik <lukas.krupcik@vsb.cz> Date: Mon, 3 Mar 2025 14:22:02 +0100 Subject: [PATCH] fix --- ...rl-interni-test.py => url_interni_test.py} | 24 +++++++++++++++---- scripts/{url-test.py => url_test.py} | 5 +++- 2 files changed, 24 insertions(+), 5 deletions(-) rename scripts/{url-interni-test.py => url_interni_test.py} (75%) rename scripts/{url-test.py => url_test.py} (83%) diff --git a/scripts/url-interni-test.py b/scripts/url_interni_test.py similarity index 75% rename from scripts/url-interni-test.py rename to scripts/url_interni_test.py index 40a2a6aa..998a8789 100644 --- a/scripts/url-interni-test.py +++ b/scripts/url_interni_test.py @@ -8,25 +8,41 @@ import re from pathlib import Path def extract_links(content): - """Extract all internal links from the file.""" + """ + Extract all internal links from the file. + + :param content: The content of the MDX file. + :return: A list of internal links. + """ link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Everything except http/https return link_pattern.findall(content) def extract_headers(content): - """Extract all H1-H6 headers for hash reference checks.""" + """ + Extract all H1-H6 headers for hash reference checks. + + :param content: The content of the MDX file. + :return: A set of headers formatted as hash links. + """ header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE) return {f"#{match[1].lower().replace(' ', '-')}" for match in header_pattern.findall(content)} def check_internal_links(directory): - """Check the existence of files and hash sections for internal links.""" + """ + Check the existence of files and hash sections for internal links. + + :param directory: The directory containing MDX files. + """ mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")} file_headers = {} + # Extract headers from each file for mdx_file, path in mdx_files.items(): with open(path, "r", encoding="utf-8") as file: content = file.read() file_headers[mdx_file] = extract_headers(content) + # Check internal links for mdx_file, path in mdx_files.items(): with open(path, "r", encoding="utf-8") as file: content = file.read() @@ -44,7 +60,7 @@ def check_internal_links(directory): print(f"❌ Broken file link in {mdx_file}: {link}") # Check if the section exists - elif hash_part and hash_part not in file_headers.get(file_part, {}): + elif hash_part and hash_part not in file_headers.get(file_part, set()): print(f"⚠️ Broken section link in {mdx_file}: {link}") if __name__ == "__main__": diff --git a/scripts/url-test.py b/scripts/url_test.py similarity index 83% rename from scripts/url-test.py rename to scripts/url_test.py index f1dd4089..917f80bb 100644 --- a/scripts/url-test.py +++ b/scripts/url_test.py @@ -26,7 +26,10 @@ def check_links_in_mdx(directory): try: response = requests.head(link, allow_redirects=True, timeout=5) if response.status_code >= 400: - print(f"❌ Broken link in {mdx_file}: {link} (Status: {response.status_code})") + print( + f"❌ Broken link in {mdx_file}: {link} " + f"(Status: {response.status_code})" + ) except requests.RequestException: print(f"⚠️ Error checking {link} in {mdx_file}") -- GitLab