In [8]:
# v1
# update file
import re
import json
from google import genai
from google.genai import types

system_prompt = """Translate texts from English to Vietnamese.
The subject matter is Early Buddhism. Translations should be *easy to understand, using simple, everyday language*.

RULE:
1. Maintain the original paragraph structure; do not add or remove any text.

2. For key terms or concepts that require clarification, please provide the Vietnamese translation,
followed by the original English word in (), and a brief explanation if necessary.
eg1: `giới cấm thủ (adherence to rules and observances-sự tuân thủ các quy tắc và việc thực hiện các nghi lễ hoặc phong tục)`
eg2: `sân hận (ill will)`

3. Keep all comment in markdown os origin position eg: `<!--p9-->`

4. Place important-keyword/concept-keyword that at the end as `Từ ngữ`.
 following format: - Vietnamese term/ pali term / English term / Brief explanation in Vietnamese
"""

system_prompt_note = """Translate annotation texts from English to Vietnamese. The subject matter is Early Buddhism. Your translations should be easy to understand, using simple, everyday language. Maintain the original paragraph structure; do not add or remove any text. For key terms or concepts that require clarification, please provide the Vietnamese translation, followed by the original English word in quotation marks, and a brief explanation if necessary.

The begin of annotation begin with `<number> content` eg: `6 Paṭhaviṁ ` or have dot `6. Paṭhaviṁ` when translate you will replace number with markdown footnote format `[^6]:`, eg:  `6 Paṭhaviṁ` -> `[^6]: translated text..`, `6. Paṭhaviṁ` -> `[^6]: translated text..`
"""

response = None
client = None
def translate_markdown(input_file: str, output_file: str, api_key: str, prompt, model):
    """
    Translates an English markdown file to Vietnamese using Google GenAI.

    Args:
        input_file: Path to the input English markdown file (e.g., "abc.md").
        output_file: Path to the output Vietnamese markdown file (e.g., "abc.vn.md").
        api_key: Your Google GenAI API key.
    """

    global response
    global client

    client = genai.Client(api_key=api_key)

    file = client.files.upload(file=input_file)
    # --- Core Translation Logic ---
    try:
        err = ""
        response = client.models.generate_content(
            model= model,
            contents=[file],
            config=types.GenerateContentConfig(
                system_instruction=prompt,
                max_output_tokens=50000,
                # candidates_token_count = 20000,
                # input_token_limit = 20000,
                temperature=0.05
                ),
        )

        if response.candidates:
            if response.candidates[0].finish_reason != types.FinishReason.STOP:
                err = f"Chunk Finish Reason: {response.candidates[0].finish_reason}"
                #If it is not stop, consider reducing the chunk size
        else: #No candidated return
            err = "response.candidates is empty. reduce the chunk size"


        vietnamese_markdown = response.text

    except Exception as e:
      err = f"Error during translation: {e}"
    finally:
         # --- Output Handling ---
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(vietnamese_markdown)
                return f'{input_file} {output_file}:{err}'
        except Exception as e:
            err = f"Error writing output file: {e}"

    return f'{input_file} {output_file}:{err}'


import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

# peterontech key
api_key = os.environ.get("GOOGLE_API_KEY")
model="gemini-2.0-pro"
model= "gemini-2.0-pro-exp-02-05"
model ="gemini-2.0-flash-thinking-exp-01-21"
model = "gemini-2.0-flash"
# model="gemini-2.0-flash"

def trans_file():
    input_filename = "../kinhtrungbo/nanamoli-bodhi-en/002-all-the-taints.md"
    output_filename = "../kinhtrungbo/nanamoli-bodhi-vi/002-all-the-taints.md"

    translate_markdown(input_filename, output_filename, api_key, system_prompt, model)

# trans_file()

def trans_folder(input_folder, output_folder, limit=3):
    results = []
    count = 1
    for filename in os.listdir(input_folder):
        if limit is not None and count >= limit:
            break  # Stop if the limit is reached

        if re.match(r"^\d{3}-.*\.md$", filename):  # Check for the filename pattern
            input_filename = os.path.join(input_folder, filename)
            output_filename = os.path.join(output_folder, filename)

            print(f"Translating {input_filename} to {output_filename}")

            try:
                # pass
                result = translate_markdown(input_filename, output_filename, api_key, system_prompt, model)
                results.append(result)
                count += 1
            except Exception as e:
                print(f"Error processing {filename}: {e}")  # Handle potential errors

    with open("tran_ressult.txt", 'w', encoding='utf-8') as outfile:
        outfile.write(json.dumps(results))

trans_folder("../kinhtrungbo/nanamoli-bodhi-en", "../kinhtrungbo/nanamoli-bodhi-vi")


Translating ../kinhtrungbo/nanamoli-bodhi-en/107-to-ganaka-moggallana.md to ../kinhtrungbo/nanamoli-bodhi-vi/107-to-ganaka-moggallana.md
Translating ../kinhtrungbo/nanamoli-bodhi-en/054-to-potaliya.md to ../kinhtrungbo/nanamoli-bodhi-vi/054-to-potaliya.md


In [None]:
# translate source file lis
import os
source_file_list = [os.path.join('../kinhtrungbo/nanamoli-bodhi', f) for f in os.listdir('../kinhtrungbo/nanamoli-bodhi') if f.endswith('.md')]

dest_file_list = []
for path in source_file_list:
    # Replace 'nanamoli-bodhi' with 'nanamoli-bodhi-vi'
    new_path = path.replace('nanamoli-bodhi', 'nanamoli-bodhi-vi')

    # Insert '.vi' before '.md'
    if new_path.endswith('.md'):
        new_path = new_path[:-3] + '.vi.md'  # Insert ".vi" before ".md"

    dest_file_list.append(new_path)


import time
trans_res = []
try:
    # for i in range(0, len(source_file_list)-30):
    # for i in range(max(0, len(source_file_list) - 30), len(source_file_list)):
    for i in range(44, len(source_file_list)):
        print (dest_file_list[i], source_file_list[i])
        input_filename = source_file_list[i]
        output_filename = dest_file_list[i]
        r = translate_markdown(input_filename, output_filename, api_key, system_prompt, model)
        trans_res.append(r)
        time.sleep(20)
        break
except Exception as e:
    raise e
    pass

print(trans_res)


In [None]:
for i in range(0, len(source_file_list)):
    print(i)