In [1]:
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from pydantic import BaseModel, Field
import json

import unicodedata
import re

from typing import Dict
from src.trilingual_translation_agent import utils
from src.trilingual_translation_agent.schema import Comment

In [2]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [3]:
start_at = 0
end_at = 0

In [4]:
ja_zh_translation_prompt = """Translate the following text from source language Japanese to target language Chinese. 
The original text is the prologue of Chiri Yukie.
Keep the original meaning accurately. Use modern Chinese. Display in Traditional Chinese. Text in languages other than Chinese or English is not allowed.
If a term cannot be translated, keep the source language.

{source_text}
"""

ja_en_translation_prompt = """Translate the following text from source language Japanese to target language English. 
The original text is the prologue of Chiri Yukie.
Use story-telling and poetic tone. Keep the original meaning accurately. Use modern and simple English.
If a term cannot be translated, keep the source language.

{source_text}
"""

en_zh_translation_prompt = """Translate the following text from source language English to target language Chinese.
The original text is the English translation of the prologue of Chiri Yukie originially in Japapnese.
The original text is translation from Japanese which is a Japanese translation of an Ainu chant, sung by Ainu god telling his story. 
Use story-telling and poetic tone. Keep the original meaning. Use modern Chinese. Display in Traditional Chinese. Text in languages other than Chinese or English is not allowed.


{source_text}
"""

zh_en_translation_prompt = """Translate the following text from source language Chinese to target language English. 
The original text is the Chinese translation of the prologue of Chiri Yukie originially in Japapnese.
Use story-telling and poetic tone. Keep the original meaning. Use modern and simple English.

{source_text}
"""

zh_reflection_prompt = """You are translating the following text from source language Japanese into target language Chinese. 
The original text is the prologue of Chiri Yukie.
You have 2 versions of Chinese translations at hand.

Here are your tasks:

1. Compare the two Chinese translations with the source Japanese text. List out the Pros and Cons of the Translations.
2. For each of translation from Japanese to Chinese, score the translation on the accuracy of original meaning with one to five stars [1,2,3,4,5].
Where one star means "Nonsense/No meaning preserved",
two stars mean "Some meaning preserved, but not understandable",
three stars mean "Some meaning preserved and understandable",
four stars mean "Most meaning preserved with possibly few grammar mistakes",
and five stars mean "Perfect meaning and grammar".

3. For each of translation from Japanese to Chinese, score the translation on the easiness to understanding with one to five stars [1,2,3,4,5].
Where one star means "The text is completely not understandable.",
two stars mean "Most part of text is not understandable.",
three stars mean "While the text is understandable, the word usage and grammar are difficult.",
four stars mean "Most part of text can be understood easily.",
and five stars mean "All the text can be understood easily.".

4. For each of translation from Japanese to Chinese, score the translation on the poetic flow with one to five stars [1,2,3,4,5].
Where one star means "All the text is literal and plain.",
two stars mean "Most part of text is literal.",
three stars mean "While the text flow is smooth, some part of the text is still literal and does not sound poetic.",
four stars mean "Most part of text is poetic.",
and five stars mean "All the text is poetic and rhythmic.".

5. Choose a better translation according to the weighted score. Accuracy of meaning is the most important criterion (50%). Easiness to understanding is the second (30%). Poetic flow is the third (20%).
6. Based on the better translation, translate the Japanese text into Chinese again, incorporating the Pros of the two translations.

Keep the original meaning accurately. Use modern Chinese. Display in Traditional Chinese. Text in languages other than Chinese or English is not allowed.
If a term cannot be translated, keep the original language.

This is the Japanese text.
{source_text}

This is the Chinese Translation 1.
{translation_1}

This is the Chinese Translation 2.
{translation_2}
"""

en_reflection_prompt = """You are translating the following text from source language Japanese into target language English.
The original text is the prologue of Chiri Yukie.
You have 2 versions of English translations at hand.

Here are your tasks:

1. Compare the two English translations with the source Japanese text. List out the Pros and Cons of the Translations.
2. For each of translation from Japanese to English, score the translation on the accuracy of original meaning with one to five stars [1,2,3,4,5].
Where one star means "Nonsense/No meaning preserved",
two stars mean "Some meaning preserved, but not understandable",
three stars mean "Some meaning preserved and understandable",
four stars mean "Most meaning preserved with possibly few grammar mistakes",
and five stars mean "Perfect meaning and grammar".

3. For each of translation from Japanese to English, score the translation on the easiness to understanding with one to five stars [1,2,3,4,5].
Where one star means "The text is completely not understandable.",
two stars mean "Most part of text is not understandable.",
three stars mean "While the text is understandable, the word usage and grammar are difficult.",
four stars mean "Most part of text can be understood easily.",
and five stars mean "All part of text can be understood easily.".

4. For each of translation from Japanese to English, score the translation on the poetic flow with one to five stars [1,2,3,4,5].
Where one star means "All the text is literal and plain.",
two stars mean "Most part of text is literal.",
three stars mean "While the text flow is smooth, some part of the text is still literal and does not sound poetic.",
four stars mean "Most part of text is poetic.",
and five stars mean "All part of the text is a poet and rhythmic. It is a chant.".

5. Choose a better translation according to the weighted score. Accuracy of meaning is the most important criterion (50%). Easiness to understanding is the second (30%). Poetic flow is the third (20%).
6. Based on the better translation, translate the Japanese text into English again, incorporating the Pros of the two translations.

Keep the original meaning accurately. 
If a term cannot be translated, keep the original language.

This is the Japanese text.
{source_text}

This is the English Translation 1.
{translation_1}

This is the English Translation 2.
{translation_2}
"""


In [5]:
class CustomScore(BaseModel):
    id: str = Field(description="For example: Translation 1 or Translation 2")
    score_accuracy: int = Field(description="Accuracy score of the translation")
    score_easy_understanding :int = Field(description="easiness to understanding score of the translation")
    score_poetic_flow :int = Field(description="Level of poetic flow of the translation")
    weighted_score: float = Field(description="weighted score of the translation")

class Reflection(BaseModel):
    """The updated translation, and the comments for the update"""
    title: str = Field(description="The translated title")
    translation_comments: list[Comment] = Field(description="Comments of all translations")
    translation_scores: list[CustomScore] = Field(description="Scores of all translations")

    better_translation:  str = Field(description="For example: Translation 1 or Translation 2")
    better_translation_comment: str = Field(description="Explanation of better translation choice.")
    updated_translation: str = Field(description="Updated Translation")



In [6]:
client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

agentic_translation = utils.TranslationAgent(client=client,model=model,source_language='Japanese',
                                             target_language_1='Chinese',target_language_2='English')


In [7]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [8]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = [""] + re.split(r'\n', s[1])

In [9]:
# Read the markdown template

#read in the template
with open("templates/updated_output_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [10]:
def output_data(japanese_title: str, ainu_title: str, japanese_text: str, chinese_translation: Dict, english_translation: Dict, 
                eng_to_chi_translation: Dict, chi_to_eng_translation: Dict, updated_translation_zh: Dict, updated_translation_en: Dict):
    output_dict = dict(japanese_title = japanese_title, ainu_title = ainu_title, japanese_text = japanese_text)
    output_dict['chinese_translation'] = chinese_translation
    output_dict['english_translation'] = english_translation
    output_dict['eng_to_chi_translation'] = eng_to_chi_translation
    output_dict['chi_to_eng_translation'] = chi_to_eng_translation
    output_dict['updated_translation_zh'] = updated_translation_zh
    output_dict['updated_translation_en'] = updated_translation_en

    return output_dict

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part

def format_markdown(md_template, ainu_titles,japanese_titles,song_no, updated_translation):
    md_output = md_template.format(ainu_title=ainu_titles[song_no], japanese_title=japanese_titles[song_no], translated_title = updated_translation['title'],
                                target_language = "Chinese",   
                                system_prompt = updated_translation['system_prompt'],
                                formatted_prompt=updated_translation['input_prompt'],
                                score_accuracy_1 = updated_translation['translation_scores'][0]['score_accuracy'],
                                score_accuracy_2 = updated_translation['translation_scores'][1]['score_accuracy'],
                                score_understanding_1 = updated_translation['translation_scores'][0]['score_easy_understanding'],
                                score_understanding_2 = updated_translation['translation_scores'][1]['score_easy_understanding'],
                                weighted_score_1 = updated_translation['translation_scores'][0]['weighted_score'],
                                weighted_score_2 = updated_translation['translation_scores'][1]['weighted_score'],
                                comment_1 = updated_translation['translation_comments'][0]['comment'],
                                comment_2 = updated_translation['translation_comments'][1]['comment'],
                                better_choice = updated_translation['better_translation'],
                                better_comment = updated_translation['better_translation_comment'],
                                output = updated_translation['updated_translation'],
                                score_poetic_1 = updated_translation['translation_scores'][0]['score_poetic_flow'],
                                score_poetic_2 = updated_translation['translation_scores'][1]['score_poetic_flow'],
                            )

    return md_output

In [None]:
for song_no in range(start_at,end_at+1):
    print (f"Processing Song {song_no}")

    with open(f"Chiri_Japanese_Translation/prologue.txt", "r", encoding="utf8") as f:
        japanese_story = f.read()
        japanese_story = unicodedata.normalize('NFKC', japanese_story)

    chinese_translation = agentic_translation.translation(prompt = ja_zh_translation_prompt,
                                                            input_data={'source_text': japanese_story})
    
    chinese_translation_updated = agentic_translation.translation(prompt = ja_zh_translation_prompt,
                                                            input_data={'source_text': japanese_story})
        
    english_translation = agentic_translation.translation(prompt = ja_en_translation_prompt, 
                                                            input_data={'source_text': japanese_story})
        
    eng_to_chi_translation = agentic_translation.translation(prompt = en_zh_translation_prompt, 
                                                            input_data={'source_text': english_translation['translation']})
        
    chi_to_eng_translation = agentic_translation.translation(prompt = zh_en_translation_prompt, 
                                                            input_data={'source_text': chinese_translation['translation']})
        
    updated_translation_zh = agentic_translation.reflection_and_update_translation(prompt=zh_reflection_prompt,schema=Reflection,
                        input_data={'source_text': japanese_story, 'translation_1': chinese_translation['translation'], 'translation_2': eng_to_chi_translation['translation']} )

    updated_translation_en = agentic_translation.reflection_and_update_translation(prompt=en_reflection_prompt,schema=Reflection,
                        input_data={'source_text': japanese_story, 'translation_1': english_translation['translation'], 'translation_2': chi_to_eng_translation['translation']} )


    output_dict = output_data(japanese_title=japanese_titles[song_no], ainu_title=ainu_titles[song_no], 
                            japanese_text=japanese_story, chinese_translation=chinese_translation,
                            english_translation=english_translation, eng_to_chi_translation=eng_to_chi_translation,
                            chi_to_eng_translation=chi_to_eng_translation, updated_translation_zh=updated_translation_zh,
                            updated_translation_en=updated_translation_en)

    #md_name_part = get_output_file_name_key(ainu_titles[song_no])

    with open(f"AgenticTranslationOutput_main_text/json/Chiri_Yukie_Prologue.json","w", encoding="utf8") as f:
        json.dump(output_dict, f, ensure_ascii=False, indent=4)
    
    md_output_zh = format_markdown(md_template, ainu_titles,japanese_titles,song_no, updated_translation_zh)

    with open(f"AgenticTranslationOutput_main_text/markdown_Chinese/Chiri_Yukie_Prologue.md","w", encoding="utf8") as f:
        f.write(md_output_zh)

    md_output_en = format_markdown(md_template, ainu_titles,japanese_titles,song_no, updated_translation_en)

    with open(f"AgenticTranslationOutput_main_text/markdown_English/Chiri_Yukie_Prologue.md","w", encoding="utf8") as f:
        f.write(md_output_en)

Processing Song 0


IndexError: list index out of range