Skip to content
Snippets Groups Projects
Commit 747a50bb authored by Lukáš Krupčík's avatar Lukáš Krupčík
Browse files

fix

parent 25161c91
No related branches found
No related tags found
1 merge request!486new file: content/docs/anselm/compute-nodes.mdx
......@@ -8,25 +8,41 @@ import re
from pathlib import Path
def extract_links(content):
"""Extract all internal links from the file."""
"""
Extract all internal links from the file.
:param content: The content of the MDX file.
:return: A list of internal links.
"""
link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Everything except http/https
return link_pattern.findall(content)
def extract_headers(content):
"""Extract all H1-H6 headers for hash reference checks."""
"""
Extract all H1-H6 headers for hash reference checks.
:param content: The content of the MDX file.
:return: A set of headers formatted as hash links.
"""
header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
return {f"#{match[1].lower().replace(' ', '-')}" for match in header_pattern.findall(content)}
def check_internal_links(directory):
"""Check the existence of files and hash sections for internal links."""
"""
Check the existence of files and hash sections for internal links.
:param directory: The directory containing MDX files.
"""
mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
file_headers = {}
# Extract headers from each file
for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as file:
content = file.read()
file_headers[mdx_file] = extract_headers(content)
# Check internal links
for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as file:
content = file.read()
......@@ -44,7 +60,7 @@ def check_internal_links(directory):
print(f"❌ Broken file link in {mdx_file}: {link}")
# Check if the section exists
elif hash_part and hash_part not in file_headers.get(file_part, {}):
elif hash_part and hash_part not in file_headers.get(file_part, set()):
print(f"⚠️ Broken section link in {mdx_file}: {link}")
if __name__ == "__main__":
......
......@@ -26,7 +26,10 @@ def check_links_in_mdx(directory):
try:
response = requests.head(link, allow_redirects=True, timeout=5)
if response.status_code >= 400:
print(f"❌ Broken link in {mdx_file}: {link} (Status: {response.status_code})")
print(
f"❌ Broken link in {mdx_file}: {link} "
f"(Status: {response.status_code})"
)
except requests.RequestException:
print(f"⚠️ Error checking {link} in {mdx_file}")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment