Skip to content
Snippets Groups Projects
Commit f697f03b authored by Lukáš Krupčík's avatar Lukáš Krupčík
Browse files

new file: scripts/url-interni-test.py

	new file:   scripts/url-test.py
parent ffc32363
No related branches found
No related tags found
1 merge request!486new file: content/docs/anselm/compute-nodes.mdx
#!/bin/python3
import re
from pathlib import Path
def extract_links(content):
""" Extrahuje všechny interní odkazy ze souboru. """
link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Vše kromě http/https
return link_pattern.findall(content)
def extract_headers(content):
""" Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """
header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)}
def check_internal_links(directory):
""" Kontroluje existenci souborů a hash sekcí pro interní odkazy. """
mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
file_headers = {}
for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as f:
content = f.read()
file_headers[mdx_file] = extract_headers(content)
for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as f:
content = f.read()
links = extract_links(content)
for link in links:
parts = link.split("#")
file_part = parts[0] if parts[0] else mdx_file
hash_part = f"#{parts[1]}" if len(parts) > 1 else None
file_target = (Path(mdx_file).parent / file_part).resolve()
# Kontrola existence souboru
if file_part and file_target not in mdx_files.values():
print(f"❌ Broken file link in {mdx_file}: {link}")
# Kontrola existence sekce
elif hash_part and hash_part not in file_headers.get(file_part, {}):
print(f"⚠️ Broken section link in {mdx_file}: {link}")
check_internal_links("content/docs")
#!/bin/python3
import re
import requests
from pathlib import Path
def check_links_in_mdx(directory):
mdx_files = Path(directory).rglob("*.mdx")
url_pattern = re.compile(r'\[.*?\]\((http[s]?://.*?)\)')
for mdx_file in mdx_files:
with open(mdx_file, "r", encoding="utf-8") as f:
content = f.read()
links = url_pattern.findall(content)
for link in links:
try:
response = requests.head(link, allow_redirects=True, timeout=5)
if response.status_code >= 400:
print(f"Broken link in {mdx_file}: {link} (Status: {response.status_code})")
except requests.RequestException:
print(f"Error checking {link} in {mdx_file}")
check_links_in_mdx("content/docs")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment