In [23]:
# v1
# update file
import re
import json
from google import genai
from google.genai import types

system_prompt = """Translate texts from English to Vietnamese.
The subject matter is Early Buddhism. Translations should be *easy to understand, using simple, everyday language*.

RULE:
1. Maintain the original paragraph structure; do not add or remove any text.

2. For key terms or concepts that require clarification, please provide the Vietnamese translation,
followed by the original English word in (), and a brief explanation if necessary.
eg1: `giới cấm thủ (adherence to rules and observances-sự tuân thủ các quy tắc và việc thực hiện các nghi lễ hoặc phong tục)`
eg2: `sân hận (ill will)`

3. Keep all comment in markdown os origin position eg: `<!--p9-->`

4. Place important-keyword/concept-keyword that at the end as `Từ ngữ`.
 following format: - Vietnamese term/ pali term / English term / Brief explanation in Vietnamese
"""

system_prompt_note = """Translate annotation texts from English to Vietnamese. The subject matter is Early Buddhism. Your translations should be easy to understand, using simple, everyday language. Maintain the original paragraph structure; do not add or remove any text. For key terms or concepts that require clarification, please provide the Vietnamese translation, followed by the original English word in quotation marks, and a brief explanation if necessary.

The begin of annotation begin with `<number> content` eg: `6 Paṭhaviṁ ` or have dot `6. Paṭhaviṁ` when translate you will replace number with markdown footnote format `[^6]:`, eg:  `6 Paṭhaviṁ` -> `[^6]: translated text..`, `6. Paṭhaviṁ` -> `[^6]: translated text..`
"""

response = None
client = None
def translate_markdown(input_file: str, output_file: str, api_key: str, prompt, model, log):
    """
    Translates an English markdown file to Vietnamese using Google GenAI.

    Args:
        input_file: Path to the input English markdown file (e.g., "abc.md").
        output_file: Path to the output Vietnamese markdown file (e.g., "abc.vn.md").
        api_key: Your Google GenAI API key.
    """

    global response
    global client

    client = genai.Client(api_key=api_key)

    file = client.files.upload(file=input_file)
    # --- Core Translation Logic ---
    try:
        err = ""
        response = client.models.generate_content(
            model= model,
            contents=[file],
            config=types.GenerateContentConfig(
                system_instruction=prompt,
                max_output_tokens=50000,
                # candidates_token_count = 20000,
                # input_token_limit = 20000,
                # temperature=0.05
                temperature=1
                ),
        )

        if response.candidates:
            if response.candidates[0].finish_reason != types.FinishReason.STOP:
                # log.write(f"{filename}, exception: File not found\n")
                log.write(f": {input_file} resonse {response.candidates[0].finish_reason}")
                #If it is not stop, consider reducing the chunk size
        else: #No candidated return
            pass
            err = "response.candidates is empty. reduce the chunk size"


        vietnamese_markdown = response.text

    except Exception as e:
      log.write(f'f"Error during translation: {e}')
    finally:
         # --- Output Handling ---
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(vietnamese_markdown)
                log.write(f'{input_file} ok')
        except Exception as e:
            log.write(f'{input_file} Error writing output file: {e}')


    # return f'{input_file} {output_file}:{err}'


import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

# peterontech key
api_key = os.environ.get("GOOGLE_API_KEY")
model="gemini-2.0-pro"
model= "gemini-2.0-pro-exp-02-05"
model ="gemini-2.0-flash-thinking-exp-01-21"
model = "gemini-2.0-flash"
# model="gemini-2.0-flash"

def trans_file():
    input_filename = "../kinhtrungbo/nanamoli-bodhi-en/intro/1.preface.md"
    output_filename = "../kinhtrungbo/nanamoli-bodhi-vi/intro/1.preface.md"
    fi = open("tran_ressult.txt", "w")
    prompt = "Translate texts from English to Vietnamese."
    translate_markdown(input_filename, output_filename, api_key, prompt, model, fi)
    fi.close()

trans_file()




In [38]:

def trans_folder(infilelist, output_folder, limit=None):
    fi = open("tran_ressult.txt", "w")

    count = 1
    for filepath in infilelist: # os.listdir(input_folder):
        filename = os.path.basename(filepath)

        if limit is not None and count >= limit:
            break  # Stop if the limit is reached

        # input_filename = os.path.join(input_folder, filename)
        output_filename = os.path.join(output_folder, filename)

        print(f"Translating {filepath} to {output_filename}")

        try:
            # pass
            translate_markdown(filepath, output_filename, api_key, system_prompt, model, fi)
            count += 1
        except Exception as e:
            print(f"Error processing {filename}: {e}")  # Handle potential errors
    fi.close()


filelist = []
for filename in sorted(os.listdir('../kinhtrungbo/nanamoli-bodhi-en/intro/')):
    filelist.append('../kinhtrungbo/nanamoli-bodhi-en/intro/'+filename)

filelist = filelist[1:-2]
# filelist = filelist[1:]
trans_folder(infilelist=filelist, output_folder="../kinhtrungbo/nanamoli-bodhi-vi/intro")


Translating ../kinhtrungbo/nanamoli-bodhi-en/intro/10.approaches-to-meditation.md to ../kinhtrungbo/nanamoli-bodhi-vi/intro/10.approaches-to-meditation.md
Translating ../kinhtrungbo/nanamoli-bodhi-en/intro/11.the-four-planes-of-liberation.md to ../kinhtrungbo/nanamoli-bodhi-vi/intro/11.the-four-planes-of-liberation.md
Translating ../kinhtrungbo/nanamoli-bodhi-en/intro/12.the-arahant.md to ../kinhtrungbo/nanamoli-bodhi-vi/intro/12.the-arahant.md
Translating ../kinhtrungbo/nanamoli-bodhi-en/intro/13.kamma-and-rebirth.md to ../kinhtrungbo/nanamoli-bodhi-vi/intro/13.kamma-and-rebirth.md
Translating ../kinhtrungbo/nanamoli-bodhi-en/intro/14.the-buddha-and-his-contemporaries.md to ../kinhtrungbo/nanamoli-bodhi-vi/intro/14.the-buddha-and-his-contemporaries.md
Translating ../kinhtrungbo/nanamoli-bodhi-en/intro/15.technical-notes.md to ../kinhtrungbo/nanamoli-bodhi-vi/intro/15.technical-notes.md
Translating ../kinhtrungbo/nanamoli-bodhi-en/intro/16.dhamma.md to ../kinhtrungbo/nanamoli-bodhi-vi/

In [15]:
# get all file
filelist = []
for filename in sorted(os.listdir('../kinhtrungbo/nanamoli-bodhi-en')):
    filelist.append(filename)