In [None]:
# update file
import sys
from google import genai
from google.genai import types

system_prompt = """1. Translate texts from English to Vietnamese.
The subject matter is Early Buddhism. Your translations should be easy to understand, using simple, everyday language.
Maintain the original paragraph structure;
do not add or remove any text. For key terms or concepts that require clarification, please provide the Vietnamese translation,
followed by the original English word in quotation marks, and a brief explanation if necessary.

2. Use Chú giải as the heading for annotation notes (replacing --Annotation).

3. Place important-keyword/concept-keyword that at the end as `Từ ngữ`.
 following format: - Vietnamese term/ pali term / English term / Brief explanation in Vietnamese
"""
system_prompt_note = """Translate annotation texts from English to Vietnamese. The subject matter is Early Buddhism. Your translations should be easy to understand, using simple, everyday language. Maintain the original paragraph structure; do not add or remove any text. For key terms or concepts that require clarification, please provide the Vietnamese translation, followed by the original English word in quotation marks, and a brief explanation if necessary.

The begin of annotation begin with `<number> content` eg: `6 Paṭhaviṁ ` or have dot `6. Paṭhaviṁ` when translate you will replace number with markdown footnote format `[^6]:`, eg:  `6 Paṭhaviṁ` -> `[^6]: translated text..`, `6. Paṭhaviṁ` -> `[^6]: translated text..`
"""

response = None
client = None
def translate_markdown(input_file: str, output_file: str, api_key: str, prompt, model):
    """
    Translates an English markdown file to Vietnamese using Google GenAI.

    Args:
        input_file: Path to the input English markdown file (e.g., "abc.md").
        output_file: Path to the output Vietnamese markdown file (e.g., "abc.vn.md").
        api_key: Your Google GenAI API key.
    """

    global response
    global client

    client = genai.Client(api_key=api_key)
    english_markdown = ''

    file = client.files.upload(file=input_file)
    # --- Core Translation Logic ---
    try:

        response = client.models.generate_content(
            model= model or 'gemini-2.0-flash',
            contents=[file],
            config=types.GenerateContentConfig(
                system_instruction=prompt,
                max_output_tokens=50000,
                # candidates_token_count = 20000,
                # input_token_limit = 20000,
                temperature=0
                ),
        )

        if response.candidates:
            if response.candidates[0].finish_reason != types.FinishReason.STOP:
                print(f"  Chunk Finish Reason: {response.candidates[0].finish_reason}")
                print(f"  Chunk Safety Ratings: {response.candidates[0].safety_ratings}")
                #If it is not stop, consider reducing the chunk size
        else: #No candidated return
            print("response.candidates is empty. reduce the chunk size")


        vietnamese_markdown = response.text

    except Exception as e:
      print(f"Error during translation: {e}")
      print(f"Full response object: {response}") #inspect for more detail, if possible
      return


    # --- Output Handling ---
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(vietnamese_markdown)
        print(f"Translation complete. Output saved to '{output_file}'.")
    except Exception as e:
        print(f"Error writing output file: {e}")



input_filename = "../kinhtrungbo/nanamoli-bodhi/002-all-the-taints.md"
output_filename = "../kinhtrungbo/nanamoli-bodhi-vi/002-all-the-taints.vi.md"

# peterontech key
api_key = os.environ.get("GOOGLE_API_KEY") #os.environ.get("GOOGLE_API_KEY") # Get API key from environment variable
api_key = os.environ.get("GOOGLE_API_KEY") # boringketo
model="gemini-2.0-pro"
model= "gemini-2.0-pro-exp-02-05"
# model="gemini-2.0-flash"

translate_markdown(input_filename, output_filename, api_key, system_prompt, model)



In [None]:
print(response.usage_metadata)
#response.candidates[0].finish_reason

In [None]:
import google.generativeai as genai

model_info = genai.get_model("models/gemini-1.5-flash")

# Returns the "context window" for the model,
# which is the combined input and output token limits.
print(f"{model_info.input_token_limit=}")
print(f"{model_info.output_token_limit=}")
# ( input_token_limit=30720, output_token_limit=2048 )

In [7]:
# v1
# update file
import sys
from google import genai
from google.genai import types

system_prompt = """1. Translate texts from English to Vietnamese.
The subject matter is Early Buddhism. Your translations should be easy to understand, using simple, everyday language.
Maintain the original paragraph structure;
do not add or remove any text. For key terms or concepts that require clarification, please provide the Vietnamese translation,
followed by the original English word in quotation marks, and a brief explanation if necessary.

2. Some line have incorrect new line, base on meaning of content, you delete empty line careffully.
eg:
```Blessed One was living in the Sakyan country at Kapilavatthu

in Nigrodha’s Park.
```
Correct is:
```Blessed One was living in the Sakyan country at Kapilavatthu
in Nigrodha’s Park.
```

3. When translate do not add "space"
eg incorect translate:
```
7.  (i) "Và này các tỳ kheo, sự hài lòng trong trường hợp
    của các dục lạc là gì?
```
eg corect translate:
```
7. (i) "Và này các tỳ kheo, sự hài lòng trong trường hợp
của các dục lạc là gì?
```

4. Keep all comment in markdown os origin position eg: `<!--p9-->`

5. Place important-keyword/concept-keyword that at the end as `Từ ngữ`.
 following format: - Vietnamese term/ pali term / English term / Brief explanation in Vietnamese
"""
system_prompt_note = """Translate annotation texts from English to Vietnamese. The subject matter is Early Buddhism. Your translations should be easy to understand, using simple, everyday language. Maintain the original paragraph structure; do not add or remove any text. For key terms or concepts that require clarification, please provide the Vietnamese translation, followed by the original English word in quotation marks, and a brief explanation if necessary.

The begin of annotation begin with `<number> content` eg: `6 Paṭhaviṁ ` or have dot `6. Paṭhaviṁ` when translate you will replace number with markdown footnote format `[^6]:`, eg:  `6 Paṭhaviṁ` -> `[^6]: translated text..`, `6. Paṭhaviṁ` -> `[^6]: translated text..`
"""

response = None
client = None
def translate_markdown(input_file: str, output_file: str, api_key: str, prompt, model):
    """
    Translates an English markdown file to Vietnamese using Google GenAI.

    Args:
        input_file: Path to the input English markdown file (e.g., "abc.md").
        output_file: Path to the output Vietnamese markdown file (e.g., "abc.vn.md").
        api_key: Your Google GenAI API key.
    """

    global response
    global client

    client = genai.Client(api_key=api_key)

    file = client.files.upload(file=input_file)
    # --- Core Translation Logic ---
    try:

        response = client.models.generate_content(
            model= model or 'gemini-2.0-flash',
            contents=[file],
            config=types.GenerateContentConfig(
                system_instruction=prompt,
                max_output_tokens=50000,
                # candidates_token_count = 20000,
                # input_token_limit = 20000,
                temperature=0.1
                ),
        )

        if response.candidates:
            if response.candidates[0].finish_reason != types.FinishReason.STOP:
                print(f"  Chunk Finish Reason: {response.candidates[0].finish_reason}")
                print(f"  Chunk Safety Ratings: {response.candidates[0].safety_ratings}")
                #If it is not stop, consider reducing the chunk size
        else: #No candidated return
            print("response.candidates is empty. reduce the chunk size")


        vietnamese_markdown = response.text

    except Exception as e:
      print(f"Error during translation: {e}")
      print(f"Full response object: {response}") #inspect for more detail, if possible
      return f'{input_file} {output_file} error {e}'
    finally:
         # --- Output Handling ---
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(vietnamese_markdown)
            print(f"Translation complete. Output saved to '{output_file}'.")
            return f'{input_file} {output_file} Translation complete '
        except Exception as e:
            print(f"Error writing output file: {e}")
            return f'{input_file} {output_file} Error writing output file: {e} '






# peterontech key
api_key = os.environ.get("GOOGLE_API_KEY") #os.environ.get("GOOGLE_API_KEY") # Get API key from environment variable
api_key = os.environ.get("GOOGLE_API_KEY") # boringketo
model="gemini-2.0-pro"
model= "gemini-2.0-pro-exp-02-05"
# model="gemini-2.0-flash"

def runme():
    input_filename = "../kinhtrungbo/nanamoli-bodhi/003-heirs-in-dhamma.md"
    output_filename = "../kinhtrungbo/nanamoli-bodhi-vi/003-heirs-in-dhamma.vi.md"

    translate_markdown(input_filename, output_filename, api_key, system_prompt, model)




In [14]:
# translate source file lis
import os
source_file_list = [os.path.join('../kinhtrungbo/nanamoli-bodhi', f) for f in os.listdir('../kinhtrungbo/nanamoli-bodhi') if f.endswith('.md')]

dest_file_list = []
for path in source_file_list:
    # Replace 'nanamoli-bodhi' with 'nanamoli-bodhi-vi'
    new_path = path.replace('nanamoli-bodhi', 'nanamoli-bodhi-vi')

    # Insert '.vi' before '.md'
    if new_path.endswith('.md'):
        new_path = new_path[:-3] + '.vi.md'  # Insert ".vi" before ".md"

    dest_file_list.append(new_path)


import time
trans_res = []
try:
    # for i in range(0, len(source_file_list)-30):
    # for i in range(max(0, len(source_file_list) - 30), len(source_file_list)):
    for i in range(44, len(source_file_list)):
        print (dest_file_list[i], source_file_list[i])
        input_filename = source_file_list[i]
        output_filename = dest_file_list[i]
        r = translate_markdown(input_filename, output_filename, api_key, system_prompt, model)
        trans_res.append(r)
        time.sleep(20)
        break
except Exception as e:
    raise e
    pass

print(trans_res)


../kinhtrungbo/nanamoli-bodhi-vi/021-the-simile-of-the-saw.vi.md ../kinhtrungbo/nanamoli-bodhi/021-the-simile-of-the-saw.md
Translation complete. Output saved to '../kinhtrungbo/nanamoli-bodhi-vi/021-the-simile-of-the-saw.vi.md'.
['../kinhtrungbo/nanamoli-bodhi/021-the-simile-of-the-saw.md ../kinhtrungbo/nanamoli-bodhi-vi/021-the-simile-of-the-saw.vi.md Translation complete ']


In [15]:
for i in range(0, len(source_file_list)):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [1]:
a=[1,2,3,4,5]
for x in range(0, len(a)-3):
    print(a[x])


1
2
