Skip to content
Snippets Groups Projects
url-interni-test.py 1.79 KiB
Newer Older
  • Learn to ignore specific revisions
  • #!/bin/python3
    
    import re
    from pathlib import Path
    
    def extract_links(content):
        """ Extrahuje všechny interní odkazy ze souboru. """
        link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)')  # Vše kromě http/https
        return link_pattern.findall(content)
    
    def extract_headers(content):
        """ Extrahuje všechna H1-H6 nadpisy pro kontrolu hash odkazů. """
        header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
        return {f"#{h[1].lower().replace(' ', '-')}": h[1] for h in header_pattern.findall(content)}
    
    def check_internal_links(directory):
        """ Kontroluje existenci souborů a hash sekcí pro interní odkazy. """
        mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")}
        file_headers = {}
    
        for mdx_file, path in mdx_files.items():
            with open(path, "r", encoding="utf-8") as f:
                content = f.read()
                file_headers[mdx_file] = extract_headers(content)
    
        for mdx_file, path in mdx_files.items():
            with open(path, "r", encoding="utf-8") as f:
                content = f.read()
                links = extract_links(content)
    
                for link in links:
                    parts = link.split("#")
                    file_part = parts[0] if parts[0] else mdx_file
                    hash_part = f"#{parts[1]}" if len(parts) > 1 else None
    
                    file_target = (Path(mdx_file).parent / file_part).resolve()
    
                    # Kontrola existence souboru
                    if file_part and file_target not in mdx_files.values():
                        print(f"❌ Broken file link in {mdx_file}: {link}")
    
                    # Kontrola existence sekce
                    elif hash_part and hash_part not in file_headers.get(file_part, {}):
                        print(f"⚠️ Broken section link in {mdx_file}: {link}")
    
    check_internal_links("content/docs")