#!/usr/bin/python3 """ Script to check internal links and section references in MDX files. """ import re from pathlib import Path def extract_links(content): """ Extract all internal links from the file. :param content: The content of the MDX file. :return: A list of internal links. """ link_pattern = re.compile(r'\[.*?\]\((?!http)(.*?)\)') # Everything except http/https return link_pattern.findall(content) def extract_headers(content): """ Extract all H1-H6 headers for hash reference checks. :param content: The content of the MDX file. :return: A set of headers formatted as hash links. """ header_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE) return {f"#{match[1].lower().replace(' ', '-')}" for match in header_pattern.findall(content)} def check_internal_links(directory): """ Check the existence of files and hash sections for internal links. :param directory: The directory containing MDX files. """ mdx_files = {f.relative_to(directory): f for f in Path(directory).rglob("*.mdx")} file_headers = {} # Extract headers from each file for mdx_file, path in mdx_files.items(): with open(path, "r", encoding="utf-8") as file: content = file.read() file_headers[mdx_file] = extract_headers(content) # Check internal links for mdx_file, path in mdx_files.items(): with open(path, "r", encoding="utf-8") as file: content = file.read() links = extract_links(content) for link in links: parts = link.split("#") file_part = parts[0] if parts[0] else mdx_file hash_part = f"#{parts[1]}" if len(parts) > 1 else None file_target = (Path(mdx_file).parent / file_part).resolve() # Check if the file exists if file_part and file_target not in mdx_files.values(): print(f"❌ Broken file link in {mdx_file}: {link}") # Check if the section exists elif hash_part and hash_part not in file_headers.get(file_part, set()): print(f"⚠️ Broken section link in {mdx_file}: {link}") if __name__ == "__main__": check_internal_links("content/docs")