url-interni-test.py

#!/bin/python3

import re
from pathlib import Path

def extract_links(content):
    """ Extrahuje všechny interní odkazy ze souboru. """
    link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)')  # Vše kromě http/https
    return link_pattern.findall(content)

def extract_headers(content):
    """ Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """
    header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
    return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)}

def check_internal_links(directory):
    """ Kontroluje existenci souborů a hash sekcí pro interní odkazy. """
    mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
    file_headers = {}

    for mdx_file, path in mdx_files.items():
        with open(path, "r", encoding="utf-8") as f:
            content = f.read()
            file_headers[mdx_file] = extract_headers(content)

    for mdx_file, path in mdx_files.items():
        with open(path, "r", encoding="utf-8") as f:
            content = f.read()
            links = extract_links(content)

            for link in links:
                parts = link.split("#")
                file_part = parts[0] if parts[0] else mdx_file
                hash_part = f"#{parts[1]}" if len(parts) > 1 else None

                file_target = (Path(mdx_file).parent / file_part).resolve()

                # Kontrola existence souboru
                if file_part and file_target not in mdx_files.values():
                    print(f"❌ Broken file link in {mdx_file}: {link}")

                # Kontrola existence sekce
                elif hash_part and hash_part not in file_headers.get(file_part, {}):
                    print(f"⚠️ Broken section link in {mdx_file}: {link}")

check_internal_links("content/docs")