Skip to content
Snippets Groups Projects
maketitle.py 2.53 KiB
Newer Older
  • Learn to ignore specific revisions
  • """
    Script to process Markdown files, convert them to MDX,
    and add frontmatter based on the first H1 heading.
    """
    
    
    import os
    import re
    from pathlib import Path
    
    def process_md_file(md_path):
    
        """
        Converts a Markdown file (.md) to MDX format (.mdx),
        adds frontmatter with title from H1 heading, and removes the original file.
    
        Args:
            md_path (Path): Path to the Markdown file.
        """
    
            with open(md_path, 'r', encoding='utf-8') as file_handle:
                content = file_handle.read()
    
        except UnicodeDecodeError:
            print(f"Skipping {md_path} - unable to decode as UTF-8")
            return
    
        # Remove existing hide-toc frontmatter
        hide_toc_pattern = re.compile(
            r'^---\s*\n\s*hide:\s*\n\s*-\s+toc\s*\n---\s*\n*',
            flags=re.MULTILINE
        )
        content = hide_toc_pattern.sub('', content, count=1)
    
        # Remove leading empty lines
        content = re.sub(r'^\n+', '', content)
    
        # Process H1 heading
        h1_pattern = re.compile(r'^\s*#\s+(.*)$', re.MULTILINE)
        match = h1_pattern.search(content)
    
        if match:
            title = match.group(1).strip()
            frontmatter = f'---\ntitle: "{title}"\n---\n'
    
            # Construct new content with frontmatter at beginning
            before_h1 = content[:match.start()].lstrip('\n')
            after_h1 = content[match.end():].lstrip('\n')
            new_content = frontmatter + before_h1 + after_h1
        else:
            print(f"No H1 heading found in {md_path}, creating basic frontmatter")
            frontmatter = '---\ntitle: ""\n---\n\n'
            new_content = frontmatter + content.lstrip('\n')
    
        # Ensure no empty lines after frontmatter
        lines = new_content.split('\n')
        if len(lines) >= 3 and lines[0] == '---' and lines[2] == '---':
            remaining = lines[3:]
            while remaining and not remaining[0].strip():
                remaining.pop(0)
            new_content = '\n'.join(lines[:3] + remaining)
    
        # Create and write MDX file
        mdx_path = md_path.with_suffix('.mdx')
    
        with open(mdx_path, 'w', encoding='utf-8') as file_handle:
            file_handle.write(new_content)
    
        # Remove original MD file
        md_path.unlink()
        print(f"Converted {md_path} to {mdx_path}")
    
    def main():
    
        """Walks through directories and processes all Markdown files."""
    
        for root, dirs, files in os.walk('.'):
            dirs[:] = [d for d in dirs if not d.startswith('.')]
            for file in files:
                if file.endswith('.md'):
                    md_path = Path(root) / file
                    process_md_file(md_path)
    
    if __name__ == '__main__':
        main()