In [3]:

import os
import re

def remove_frontmatter(directory):
    """
    Removes frontmatter from files in the given directory.

    Args:
        directory (str): The directory containing the files to process.
    """
    for filename in os.listdir(directory):
        if filename.endswith(".md"):
            filepath = os.path.join(directory, filename)
            with open(filepath, "r+") as f:
                content = f.read()

                # Define the regex pattern for the frontmatter
                pattern = re.compile(
                    r"^---\s*title:\s*\"[^\"]*\"\s*prev:\s*[^\n]*\s*next:\s*[^\n]*\s*---\s*\n*",
                    re.MULTILINE
                )

                # Check if the content matches the pattern
                match = pattern.match(content)
                if match:
                    # print(f"Removing frontmatter from {match[0]}")
                    # Remove the frontmatter
                    new_content = content[match.end():]

                    # Write the new content back to the file
                    f.seek(0)
                    f.write(new_content)
                    f.truncate()  # Remove any remaining characters after the new content

# Example usage:
directory_path = "../docs/kinhtrungbo/thichminhchau"  # Replace with the actual path
remove_frontmatter(directory_path)

In [14]:
import os
import re
import unidecode

def slugify(text):
    text = unidecode.unidecode(text).lower()
    return re.sub(r'[\W_]+', '-', text)

def format_number(s):
    parts = s.split(". ", 1)
    if len(parts) != 2:
        return s
    number_str = parts[0]
    try:
        number = int(number_str)
        formatted_number = "{:03d}".format(number)
        return formatted_number + ". " + parts[1]
    except ValueError:
        return s
def remove_trailing_hyphen(s):
    if s.endswith("-"):
        return s[:-1]
    return s
def generate_slug(directory):
    """
    Generates slugs for markdown files in the given directory.

    Args:
        directory (str): The directory containing the markdown files.

    Returns:
        dict: A dictionary mapping original filenames to new filenames.
    """
    filename_mapping = {}
    for filename in os.listdir(directory):
        match = re.match(r"^(\d+)\.md$", filename)
        if match:
            filepath = os.path.join(directory, filename)
            with open(filepath, "r") as f:
                content = f.read()
                title_match = re.search(r"^#\s+(.+)$", content, re.MULTILINE)
                if title_match:
                    title = title_match.group(1).strip()
                    title = format_number(title)
                    # padded_number = str(number).zfill(3)
                    slug = remove_trailing_hyphen(slugify(title.lower()))
                    new_filename = f"{slug}.md"
                    filename_mapping[filename] = new_filename
    return filename_mapping

# Example usage:
directory_path = "../docs/kinhtrungbo/nanamoli-bodhi"  # Replace with the actual path
filename_mapping = generate_slug(directory_path)

print(filename_mapping)

{'28.md': '028-the-greater-discourse-on-the-simile-of-the-elephant-s.md', '5.md': '005-without-blemishes.md', '38.md': '038-the-greater-discourse-on-the-destruction-of-craving.md', '18.md': '018-the-honeyball.md', '49.md': '049-the-invitation-of-a-brahma.md', '1.md': '001-the-root-of-all-things.md', '19.md': '019-two-kinds-of-thought.md', '48.md': '048-the-kosambians.md', '29.md': '029-the-greater-discourse-on-the-simile-of-the-heartwood.md', '4.md': '004-fear-and-dread.md', '39.md': '039-the-greater-discourse-at-assapura.md', '47.md': '047-the-inquirer.md', '16.md': '016-the-wilderness-in-the-heart.md', '22.md': '022-the-simile-of-the-snake.md', '32.md': '032-the-greater-discourse-in-gosinga.md', '26.md': '026-the-noble-search.md', '12.md': '012-the-greater-discourse-on-the-lion-s-roar.md', '43.md': '043-the-greater-series-of-questions-and-answers.md', '36.md': '036-the-greater-discourse-to-saccaka.md', '27.md': '027-the-shorter-discourse-on-the-simile-of-the-elephant-s.md', '13.md': 

In [12]:
type(filename_mapping)

dict

In [15]:
def rename_files(directory, name_map):
    """
    Renames files in a directory based on a dictionary mapping old names to new names.

    Args:
        directory (str): The absolute path to the directory containing the files to rename.
        name_map (dict): A dictionary where keys are the old filenames and values are the new filenames.
    """

    for old_name, new_name in name_map.items():
        old_path = os.path.join(directory, old_name)
        new_path = os.path.join(directory, new_name)

        try:
            if os.path.exists(old_path):
                os.rename(old_path, new_path)
                print(f"Renamed '{old_name}' to '{new_name}'")
            else:
                print(f"File not found: '{old_name}'")
        except Exception as e:
            print(f"Error renaming '{old_name}': {e}")

rename_files(directory_path , filename_mapping)

Renamed '28.md' to '028-the-greater-discourse-on-the-simile-of-the-elephant-s.md'
Renamed '5.md' to '005-without-blemishes.md'
Renamed '38.md' to '038-the-greater-discourse-on-the-destruction-of-craving.md'
Renamed '18.md' to '018-the-honeyball.md'
Renamed '49.md' to '049-the-invitation-of-a-brahma.md'
Renamed '1.md' to '001-the-root-of-all-things.md'
Renamed '19.md' to '019-two-kinds-of-thought.md'
Renamed '48.md' to '048-the-kosambians.md'
Renamed '29.md' to '029-the-greater-discourse-on-the-simile-of-the-heartwood.md'
Renamed '4.md' to '004-fear-and-dread.md'
Renamed '39.md' to '039-the-greater-discourse-at-assapura.md'
Renamed '47.md' to '047-the-inquirer.md'
Renamed '16.md' to '016-the-wilderness-in-the-heart.md'
Renamed '22.md' to '022-the-simile-of-the-snake.md'
Renamed '32.md' to '032-the-greater-discourse-in-gosinga.md'
Renamed '26.md' to '026-the-noble-search.md'
Renamed '12.md' to '012-the-greater-discourse-on-the-lion-s-roar.md'
Renamed '43.md' to '043-the-greater-series-of