Newer
Older
"""
Script to process Markdown files, convert them to MDX,
and add frontmatter based on the first H1 heading.
"""
import os
import re
from pathlib import Path
def process_md_file(md_path):
"""
Converts a Markdown file (.md) to MDX format (.mdx),
adds frontmatter with title from H1 heading, and removes the original file.
Args:
md_path (Path): Path to the Markdown file.
"""
with open(md_path, 'r', encoding='utf-8') as file_handle:
content = file_handle.read()
except UnicodeDecodeError:
print(f"Skipping {md_path} - unable to decode as UTF-8")
return
# Remove existing hide-toc frontmatter
hide_toc_pattern = re.compile(
r'^---\s*\n\s*hide:\s*\n\s*-\s+toc\s*\n---\s*\n*',
flags=re.MULTILINE
)
content = hide_toc_pattern.sub('', content, count=1)
# Remove leading empty lines
content = re.sub(r'^\n+', '', content)
# Process H1 heading
h1_pattern = re.compile(r'^\s*#\s+(.*)$', re.MULTILINE)
match = h1_pattern.search(content)
if match:
title = match.group(1).strip()
frontmatter = f'---\ntitle: "{title}"\n---\n'
# Construct new content with frontmatter at beginning
before_h1 = content[:match.start()].lstrip('\n')
after_h1 = content[match.end():].lstrip('\n')
new_content = frontmatter + before_h1 + after_h1
else:
print(f"No H1 heading found in {md_path}, creating basic frontmatter")
frontmatter = '---\ntitle: ""\n---\n\n'
new_content = frontmatter + content.lstrip('\n')
# Ensure no empty lines after frontmatter
lines = new_content.split('\n')
if len(lines) >= 3 and lines[0] == '---' and lines[2] == '---':
remaining = lines[3:]
while remaining and not remaining[0].strip():
remaining.pop(0)
new_content = '\n'.join(lines[:3] + remaining)
# Create and write MDX file
mdx_path = md_path.with_suffix('.mdx')
with open(mdx_path, 'w', encoding='utf-8') as file_handle:
file_handle.write(new_content)
# Remove original MD file
md_path.unlink()
print(f"Converted {md_path} to {mdx_path}")
def main():
"""Walks through directories and processes all Markdown files."""
for root, dirs, files in os.walk('.'):
dirs[:] = [d for d in dirs if not d.startswith('.')]
for file in files:
if file.endswith('.md'):
md_path = Path(root) / file
process_md_file(md_path)
if __name__ == '__main__':
main()