In [36]:
import re
import os
# 01 read thichminhcau as base json {}
def read_title_and_file_path(filepath):
    """
    đọc title thichminhchau
     "title": title,
    "sutta": sutta_name,
    "file1": filename

    Reads a markdown file and extracts the title, Sutta name, and filename.

    Args:
        filepath: The path to the markdown file.

    Returns:
        A dictionary containing the title, Sutta name, and filename, or None
        if the file cannot be read or if the necessary information is not found.
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: File not found: {filepath}")
        return None
    except Exception as e:
        print(f"Error reading file {filepath}: {e}")
        return None


    # Extract title (from the first h1 heading)
    title_match = re.search(r"^#\s+(.+)", content)
    # id = title_match.group(0).strip() if title_match else None
    title = title_match.group(1).strip() if title_match else None

    # Extract Sutta name (from the line starting with "***")
    sutta_match = re.search(r"^\*\*\*\((.+?)\)\*\*\*", content, re.MULTILINE)
    sutta_name = sutta_match.group(1).strip() if sutta_match else None

    # Another way to find title and sutta name
    title_match = re.search(r"# (.*?)\n", content)
    if title_match:
      title = title_match.group(1).strip()

    lines = content.split('\n')
    for line in lines:
       match = re.search(r'\*\*\*\((.*)\)\*\*\*',line) #search in line
       if match:
        sutta_name = match.group(1).strip()
        break

    # Get filename
    filename = filepath

    if title and sutta_name:
        return {
            "title": title,
            "sutta": sutta_name,
            "file1": filename
        }
    else:
        print(f"Warning: Could not extract all information from {filepath}")
        print(f"  Title found: {title}")
        print(f"  Sutta Name found: {sutta_name}")
        return {  # Return what we have, even if incomplete.
            "title": title,
            "sutta": sutta_name,
            "file1": filename
        }


# Example usage:
# filepath = '../kinhtrungbo/thichminhchau/002-kinh-tat-ca-cac-lau-hoac.md' # Replace with your actual file path




# result = parse_markdown_file(filepath)

# if result:
#     print(result)
# else:
#     print("Could not extract information from the file.")

In [37]:
def convert_to_title_index_to_markdown_index(data):
    """
    Converts a list of dictionaries to a Markdown list, handling missing keys.
    out: [1. KINH PHÁP MÔN CĂN BẢN](/kinhtrungbo/thichminhchau/001-kinh-phap-mon-can-ban.md)
    Args:
      data: A list of dictionaries.  'title' and 'file1' keys are expected
            but the function will handle cases where they are missing.

    Returns:
      A string representing the Markdown list.
    """
    markdown_string = ""
    for item in data:
        title = item.get('title', '')  # Get title, default to "" if not present
        file1 = item.get('file1', '')  # Get file1, default to "" if not present
        if title and file1: #only add if have both value
          markdown_string += f"- [{title}]({file1})\n"
        elif title:
          markdown_string += f"- {title} (No file link)\n" # add title no link
        elif file1:
          markdown_string += f"- [Unknown Title]({file1})\n" #add unknow title
    return markdown_string

In [None]:
# 1/4 list all tile of thich minh chau as base
def list_files_in_folder(folder_path):

    return sorted([f'{folder_path}/{f}' for f in os.listdir(folder_path) if f.endswith('.md')])

# thich minh chau

def thichminhchau():
    tmc_title = []
    folder_path = '/Users/ng/projects/n5/docs/kinhtrungbo/thichminhchau'
    file_list = list_files_in_folder(folder_path)

    for file in file_list:
        result = read_title_and_file_path(file)
        result['file1'] = result['file1'][2:]
        tmc_title.append(result)

    return tmc_title


def nanamoli():
    nanamoli_title = []
    folder_path = '../kinhtrungbo/nanamoli-bodhi'
    file_list = list_files_in_folder(folder_path)

    for file in file_list:
        result = read_title_and_file_path(file)
        # print(result)
        result['file1'] = result['file1'][2:]
        nanamoli_title.append(result)

    # print(nanamoli_title)
    return nanamoli_title


nanamoli_title = nanamoli()
print(nanamoli_title)
# folder_path = '/Users/ng/projects/n5/docs/kinhtrungbo/nanamoli-bodhi'
# file_list = list_files_in_folder(folder_path)
# results = []
# for file in file_list:
#     result = parse_markdown_file(filepath)
#     result['file1'] = result['file1'][2:]
#     results.append(result)

# print(results)


In [None]:
nanamoli_title
#nanamoli_title_md_index = convert_to_title_index_to_markdown_index(nanamoli_title)
#print(nanamoli_title_md_index)


In [None]:
import json
# print(json.dumps(tmc_title, indent=2, ensure_ascii=False))

In [None]:


# print(convert_to_list_to_markdown_index(tmc_title))

In [None]:
# 2/4 get list of 2nd docs
nanamoli_title = sorted(['/kinhtrungbo/nanamoli-bodhi-vi/'+f for f in os.listdir('../kinhtrungbo/nanamoli-bodhi-vi') if f.endswith('.md')])
print(len(nanamoli_title))

In [None]:
# 3/4 combine 1st + 2nd list
def combine_lists(list_a, list_b):
  """
  Combines two lists to create a new list with the specified format.

  Args:
      list_a: The first list (A).
      list_b: The second list (B).

  Returns:
      A new list with the combined data.  Returns an empty list if the input
      lists have different lengths.
  """

  if len(list_a) != len(list_b):
    print("Error: Lists A and B must have the same length.")
    return []

  combined_list = []
  for i in range(len(list_a)):
    a_item = list_a[i]
    b_item = list_b[i]

    # Extract filename without extension and leading path.  Handle potential errors.
    try:
      filename = a_item['file1'].split('/')[-1].replace('.md', '')
      slug_prefix = "tmc-mn-bodhi-"
      slug = slug_prefix + filename

    except (KeyError, IndexError) as e:
        print(f"Error processing item {i}: {e}.  Skipping this item.")
        continue # Skip to the next iteration if an error occurs

    combined_item = {
        "params": {
            "slug": slug,
            "data": {
                "title": a_item['title'],
                "left": b_item,
                "right": a_item['file1'],  # Corrected placement
                "leftTitle": "Nanamoli-Bodhi",
                "rightTitle": "Thích Minh Châu",
                "notePath": ""
            }
        }
    }
    combined_list.append(combined_item)

  return combined_list

# A = tmc_title[:50]
# B = nanamoli_title


# combined_result = combine_lists(A, B)
# # Nicely print the result using json.dumps for readability
# import json
# print(json.dumps(combined_result, indent=2, ensure_ascii=False))

In [None]:
def json_to_markdown_to_comare_list(json_data, path):
    """
    Converts a JSON array of objects with "params" and "data" to a Markdown list.

    Args:
        json_data:  The input JSON data (as a string or a Python list/dict).

    Returns:
        A string containing the Markdown list.
    """

    if isinstance(json_data, str):
        try:
            data = json.loads(json_data)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
            return ""
    else:
        data = json_data

    if not isinstance(data, list):
        print("Error: Input JSON data must be an array.")
        return ""

    markdown_list = []
    for item in data:
        try:
            slug = item['params']['slug']
            title = item['params']['data']['title']
            markdown_list.append(f"- [{title}]({path}/{slug})")  # Use slug as link target
        except KeyError as e:
            print(f"Error: Missing key in JSON data: {e}")
            continue  # Skip to the next item if there's a KeyError

    return "\n".join(markdown_list)

# compate_markdown_index = json_to_markdown_to_comare_list(combined_result, '/kinhtrungbo/c-nm-tmc-vi')

# json.dumps(compate_markdown_index, indent=2, ensure_ascii=False)
# print(compate_markdown_index)

In [None]:
# 4/4 add nextlink, backlink
def generate_link(index, item):
  """Generates the link object for nextlink and backlink."""
  return {
      "text": item['params']['data']['title'],
      "link": f"/kinhtrungbo/c-nm-tmc-vi/{item['params']['slug']}"
  }
def add_links(data):
    """Adds nextlink and backlink to each element in the data list."""
    for i in range(len(data)):
        # Backlink
        if i > 0:
            data[i]["params"]["data"]["backlink"] = generate_link(i - 1, data[i - 1])
        else:
            data[i]["params"]["data"]["backlink"] = False

        # Nextlink
        if i < len(data) - 1:
             data[i]["params"]["data"]["nextlink"] = generate_link(i + 1, data[i+1])
        else:
            data[i]["params"]["data"]["nextlink"] = False

    return data

# updated_data = add_links(combined_result)

# # Print the updated data (for verification)
# import json
# print(json.dumps(updated_data, indent=2, ensure_ascii=False))