Skip to content
Snippets Groups Projects
Commit 25161c91 authored by Lukáš Krupčík's avatar Lukáš Krupčík
Browse files

modified: scripts/url-interni-test.py

	modified:   scripts/url-test.py
parent f697f03b
No related branches found
No related tags found
1 merge request!486new file: content/docs/anselm/compute-nodes.mdx
#!/bin/python3 #!/usr/bin/python3
"""
Script to check internal links and section references in MDX files.
"""
import re import re
from pathlib import Path from pathlib import Path
def extract_links(content): def extract_links(content):
""" Extrahuje všechny interní odkazy ze souboru. """ """Extract all internal links from the file."""
link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Vše kromě http/https link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Everything except http/https
return link_pattern.findall(content) return link_pattern.findall(content)
def extract_headers(content): def extract_headers(content):
""" Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """ """Extract all H1-H6 headers for hash reference checks."""
header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE) header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)} return {f"#{match[1].lower().replace(' ', '-')}" for match in header_pattern.findall(content)}
def check_internal_links(directory): def check_internal_links(directory):
""" Kontroluje existenci souborů a hash sekcí pro interní odkazy. """ """Check the existence of files and hash sections for internal links."""
mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")} mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
file_headers = {} file_headers = {}
for mdx_file, path in mdx_files.items(): for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as f: with open(path, "r", encoding="utf-8") as file:
content = f.read() content = file.read()
file_headers[mdx_file] = extract_headers(content) file_headers[mdx_file] = extract_headers(content)
for mdx_file, path in mdx_files.items(): for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as f: with open(path, "r", encoding="utf-8") as file:
content = f.read() content = file.read()
links = extract_links(content) links = extract_links(content)
for link in links: for link in links:
...@@ -35,13 +39,13 @@ def check_internal_links(directory): ...@@ -35,13 +39,13 @@ def check_internal_links(directory):
file_target = (Path(mdx_file).parent / file_part).resolve() file_target = (Path(mdx_file).parent / file_part).resolve()
# Kontrola existence souboru # Check if the file exists
if file_part and file_target not in mdx_files.values(): if file_part and file_target not in mdx_files.values():
print(f"❌ Broken file link in {mdx_file}: {link}") print(f"❌ Broken file link in {mdx_file}: {link}")
# Kontrola existence sekce # Check if the section exists
elif hash_part and hash_part not in file_headers.get(file_part, {}): elif hash_part and hash_part not in file_headers.get(file_part, {}):
print(f"⚠️ Broken section link in {mdx_file}: {link}") print(f"⚠️ Broken section link in {mdx_file}: {link}")
check_internal_links("content/docs") if __name__ == "__main__":
check_internal_links("content/docs")
#!/bin/python3 #!/usr/bin/python3
"""
Script to check external links in MDX files.
"""
import re import re
import requests
from pathlib import Path from pathlib import Path
import requests
def check_links_in_mdx(directory): def check_links_in_mdx(directory):
"""
Scans MDX files in the given directory for external links and checks their availability.
:param directory: Path to the directory containing MDX files.
"""
mdx_files = Path(directory).rglob("*.mdx") mdx_files = Path(directory).rglob("*.mdx")
url_pattern = re.compile(r'\[.*?\]\((http[s]?://.*?)\)') url_pattern = re.compile(r'\[.*?\]\((http[s]?://.*?)\)')
for mdx_file in mdx_files: for mdx_file in mdx_files:
with open(mdx_file, "r", encoding="utf-8") as f: with open(mdx_file, "r", encoding="utf-8") as file:
content = f.read() content = file.read()
links = url_pattern.findall(content) links = url_pattern.findall(content)
for link in links: for link in links:
try: try:
response = requests.head(link, allow_redirects=True, timeout=5) response = requests.head(link, allow_redirects=True, timeout=5)
if response.status_code >= 400: if response.status_code >= 400:
print(f"Broken link in {mdx_file}: {link} (Status: {response.status_code})") print(f"Broken link in {mdx_file}: {link} (Status: {response.status_code})")
except requests.RequestException: except requests.RequestException:
print(f"Error checking {link} in {mdx_file}") print(f"⚠️ Error checking {link} in {mdx_file}")
check_links_in_mdx("content/docs") if __name__ == "__main__":
check_links_in_mdx("content/docs")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment