In [4]:
import re
import os
# 01 read thichminhcau as base json {}
def parse_markdown_file(filepath):
    """
    đọc title thichminhchau
     "title": title,
    "sutta": sutta_name,
    "file1": filename

    Reads a markdown file and extracts the title, Sutta name, and filename.

    Args:
        filepath: The path to the markdown file.

    Returns:
        A dictionary containing the title, Sutta name, and filename, or None
        if the file cannot be read or if the necessary information is not found.
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: File not found: {filepath}")
        return None
    except Exception as e:
        print(f"Error reading file {filepath}: {e}")
        return None


    # Extract title (from the first h1 heading)
    title_match = re.search(r"^#\s+(.+)", content)
    # id = title_match.group(0).strip() if title_match else None
    title = title_match.group(1).strip() if title_match else None

    # Extract Sutta name (from the line starting with "***")
    sutta_match = re.search(r"^\*\*\*\((.+?)\)\*\*\*", content, re.MULTILINE)
    sutta_name = sutta_match.group(1).strip() if sutta_match else None

    # Another way to find title and sutta name
    title_match = re.search(r"# (.*?)\n", content)
    if title_match:
      title = title_match.group(1).strip()

    lines = content.split('\n')
    for line in lines:
       match = re.search(r'\*\*\*\((.*)\)\*\*\*',line) #search in line
       if match:
        sutta_name = match.group(1).strip()
        break

    # Get filename
    filename = filepath

    if title and sutta_name:
        return {
            "title": title,
            "sutta": sutta_name,
            "file1": filename
        }
    else:
        print(f"Warning: Could not extract all information from {filepath}")
        print(f"  Title found: {title}")
        print(f"  Sutta Name found: {sutta_name}")
        return {  # Return what we have, even if incomplete.
            "title": title,
            "sutta": sutta_name,
            "file1": filename
        }


# Example usage:
# filepath = '../kinhtrungbo/thichminhchau/002-kinh-tat-ca-cac-lau-hoac.md' # Replace with your actual file path




# result = parse_markdown_file(filepath)

# if result:
#     print(result)
# else:
#     print("Could not extract information from the file.")

In [13]:
# 1/4 list all tile of thich minh chau as base
def list_files_in_folder(folder_path):

    return sorted(['../kinhtrungbo/thichminhchau/'+f for f in os.listdir(folder_path) if f.endswith('.md')])


# thich minh chau
folder_path = '/Users/ng/projects/n5/docs/kinhtrungbo/thichminhchau'
file_list = list_files_in_folder(folder_path)
tmc_title = []
for file in file_list:
    result = parse_markdown_file(file)
    result['file1'] = result['file1'][2:]
    tmc_title.append(result)

print(tmc_title)

# folder_path = '/Users/ng/projects/n5/docs/kinhtrungbo/nanamoli-bodhi'
# file_list = list_files_in_folder(folder_path)
# results = []
# for file in file_list:
#     result = parse_markdown_file(filepath)
#     result['file1'] = result['file1'][2:]
#     results.append(result)

# print(results)


  Title found: 1. Lời Nói Ðầu
  Sutta Name found: None
  Title found: Kinh Trung Bộ
  Sutta Name found: None
[{'title': '1. KINH PHÁP MÔN CĂN BẢN', 'sutta': 'Mùlapariyàya Sutta', 'file1': '/kinhtrungbo/thichminhchau/001-kinh-phap-mon-can-ban.md'}, {'title': '2. KINH TẤT CẢ CÁC LẬU HOẶC', 'sutta': 'Sabbàsava Sutta', 'file1': '/kinhtrungbo/thichminhchau/002-kinh-tat-ca-cac-lau-hoac.md'}, {'title': '3. KINH THỪA TỰ PHÁP', 'sutta': 'Dhammadàyàda Sutta', 'file1': '/kinhtrungbo/thichminhchau/003-kinh-thua-tu-phap.md'}, {'title': '4. KINH SỢ HÃI KHIẾP ÐẢM', 'sutta': 'Bhayabherava Sutta', 'file1': '/kinhtrungbo/thichminhchau/004-kinh-so-hai-khiep-dam.md'}, {'title': '5. KINH KHÔNG UẾ NHIỄM', 'sutta': 'Anangana Sutta', 'file1': '/kinhtrungbo/thichminhchau/005-kinh-khong-ue-nhiem.md'}, {'title': '6. KINH ƯỚC NGUYỆN', 'sutta': 'Akankheyya Sutta', 'file1': '/kinhtrungbo/thichminhchau/006-kinh-uoc-nguyen.md'}, {'title': '7. KINH VÍ DỤ TẤM VẢI', 'sutta': 'Vatthùpama Sutta', 'file1': '/kinhtrungbo/th

In [18]:
# 2/4 get list of 2nd docs
nanamoli_title = sorted(['/kinhtrungbo/nanamoli-bodhi-vi/'+f for f in os.listdir('../kinhtrungbo/nanamoli-bodhi-vi') if f.endswith('.md')])
print(len(nanamoli_title))

50


In [19]:
# 3/4 combine 1st + 2nd list
def combine_lists(list_a, list_b):
  """
  Combines two lists to create a new list with the specified format.

  Args:
      list_a: The first list (A).
      list_b: The second list (B).

  Returns:
      A new list with the combined data.  Returns an empty list if the input
      lists have different lengths.
  """

  if len(list_a) != len(list_b):
    print("Error: Lists A and B must have the same length.")
    return []

  combined_list = []
  for i in range(len(list_a)):
    a_item = list_a[i]
    b_item = list_b[i]

    # Extract filename without extension and leading path.  Handle potential errors.
    try:
      filename = a_item['file1'].split('/')[-1].replace('.md', '')
      slug_prefix = "tmc-mn-bodhi-"
      slug = slug_prefix + filename

    except (KeyError, IndexError) as e:
        print(f"Error processing item {i}: {e}.  Skipping this item.")
        continue # Skip to the next iteration if an error occurs

    combined_item = {
        "params": {
            "slug": slug,
            "data": {
                "title": a_item['title'],
                "left": b_item,
                "right": a_item['file1'],  # Corrected placement
                "leftTitle": "Nanamoli-Bodhi",
                "rightTitle": "Thích Minh Châu",
                "notePath": ""
            }
        }
    }
    combined_list.append(combined_item)

  return combined_list

A = tmc_title[:50]
B = nanamoli_title


combined_result = combine_lists(A, B)
# Nicely print the result using json.dumps for readability
import json
print(json.dumps(combined_result, indent=2, ensure_ascii=False))

[
  {
    "params": {
      "slug": "tmc-mn-bodhi-001-kinh-phap-mon-can-ban",
      "data": {
        "title": "1. KINH PHÁP MÔN CĂN BẢN",
        "left": "/kinhtrungbo/nanamoli-bodhi-vi/001-the-root-of-all-things.vi.md",
        "right": "/kinhtrungbo/thichminhchau/001-kinh-phap-mon-can-ban.md",
        "leftTitle": "Nanamoli-Bodhi",
        "rightTitle": "Thích Minh Châu",
        "notePath": ""
      }
    }
  },
  {
    "params": {
      "slug": "tmc-mn-bodhi-002-kinh-tat-ca-cac-lau-hoac",
      "data": {
        "title": "2. KINH TẤT CẢ CÁC LẬU HOẶC",
        "left": "/kinhtrungbo/nanamoli-bodhi-vi/002-all-the-taints.vi.md",
        "right": "/kinhtrungbo/thichminhchau/002-kinh-tat-ca-cac-lau-hoac.md",
        "leftTitle": "Nanamoli-Bodhi",
        "rightTitle": "Thích Minh Châu",
        "notePath": ""
      }
    }
  },
  {
    "params": {
      "slug": "tmc-mn-bodhi-003-kinh-thua-tu-phap",
      "data": {
        "title": "3. KINH THỪA TỰ PHÁP",
        "left": "/kinhtrungbo/na

In [21]:
# 4/4 add nextlink, backlink
def generate_link(index, item):
  """Generates the link object for nextlink and backlink."""
  return {
      "text": item['params']['data']['title'],
      "link": f"/kinhtrungbo/c-nm-tmc-vi/{item['params']['slug']}"
  }
def add_links(data):
    """Adds nextlink and backlink to each element in the data list."""
    for i in range(len(data)):
        # Backlink
        if i > 0:
            data[i]["params"]["data"]["backlink"] = generate_link(i - 1, data[i - 1])
        else:
            data[i]["params"]["data"]["backlink"] = False

        # Nextlink
        if i < len(data) - 1:
             data[i]["params"]["data"]["nextlink"] = generate_link(i + 1, data[i+1])
        else:
            data[i]["params"]["data"]["nextlink"] = False

    return data

updated_data = add_links(combined_result)

# Print the updated data (for verification)
import json
print(json.dumps(updated_data, indent=2, ensure_ascii=False))

[
  {
    "params": {
      "slug": "tmc-mn-bodhi-001-kinh-phap-mon-can-ban",
      "data": {
        "title": "1. KINH PHÁP MÔN CĂN BẢN",
        "left": "/kinhtrungbo/nanamoli-bodhi-vi/001-the-root-of-all-things.vi.md",
        "right": "/kinhtrungbo/thichminhchau/001-kinh-phap-mon-can-ban.md",
        "leftTitle": "Nanamoli-Bodhi",
        "rightTitle": "Thích Minh Châu",
        "notePath": "",
        "backlink": false,
        "nextlink": {
          "text": "2. KINH TẤT CẢ CÁC LẬU HOẶC",
          "link": "/kinhtrungbo/c-nm-tmc-vi/tmc-mn-bodhi-002-kinh-tat-ca-cac-lau-hoac"
        }
      }
    }
  },
  {
    "params": {
      "slug": "tmc-mn-bodhi-002-kinh-tat-ca-cac-lau-hoac",
      "data": {
        "title": "2. KINH TẤT CẢ CÁC LẬU HOẶC",
        "left": "/kinhtrungbo/nanamoli-bodhi-vi/002-all-the-taints.vi.md",
        "right": "/kinhtrungbo/thichminhchau/002-kinh-tat-ca-cac-lau-hoac.md",
        "leftTitle": "Nanamoli-Bodhi",
        "rightTitle": "Thích Minh Châu",
       