Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/python3
import re
from pathlib import Path
def extract_links(content):
""" Extrahuje všechny interní odkazy ze souboru. """
link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Vše kromě http/https
return link_pattern.findall(content)
def extract_headers(content):
""" Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """
header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)}
def check_internal_links(directory):
""" Kontroluje existenci souborů a hash sekcí pro interní odkazy. """
mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
file_headers = {}
for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as f:
content = f.read()
file_headers[mdx_file] = extract_headers(content)
for mdx_file, path in mdx_files.items():
with open(path, "r", encoding="utf-8") as f:
content = f.read()
links = extract_links(content)
for link in links:
parts = link.split("#")
file_part = parts[0] if parts[0] else mdx_file
hash_part = f"#{parts[1]}" if len(parts) > 1 else None
file_target = (Path(mdx_file).parent / file_part).resolve()
# Kontrola existence souboru
if file_part and file_target not in mdx_files.values():
print(f"❌ Broken file link in {mdx_file}: {link}")
# Kontrola existence sekce
elif hash_part and hash_part not in file_headers.get(file_part, {}):
print(f"⚠️ Broken section link in {mdx_file}: {link}")
check_internal_links("content/docs")