In [41]:
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from pydantic import BaseModel, Field
import json

import unicodedata
import re

from pydantic import BaseModel, Field

In [42]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [43]:
start_at = 4
end_at = 4

In [44]:
class Translation(BaseModel):
    """The updated translation, and the comments for the update"""
    updated_translation: str = Field(description="The updated translation")
    comment: str = Field(description="Comments on the translation update")

In [45]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English. You can also translation Chinese into English, and English into Chinese."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [46]:
reflection_prompt = """You are translating the following Japanese text into Chinese. The original text is a Japanese translation of a Ainu chant, sung by Ainu god telling his story. 
You have 2 versions of Chinese translations at hand.

Here are your tasks:

1. Compare the two Chinese translations with the original Japanese text. List out the Pros and Cons of the two Translations.
2. Choose a better translation. Accuracy of meaning is the most important criterion. Easy to understand is the second. Poetic flow is the thrid.
3. Based on the better translation, translate the Japanese text into Chinese again, incorporating the Pros of the two translations.

Use story-telling and poetic tone for the translation. Keep the original Japanese meaning accurately. Use modern Chinese for easy understanding. Display in Traditional Chinese. 
If a term cannot be translated, keep the original language.

This is the Japanese text.
{japanese_text}

This is the Chinese Translation 1.
{chinese_translation_1}

This is the Chinese Translation 2.
{chinese_translation_2}
"""


In [47]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part




In [48]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/iter1_output_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [49]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [50]:
for song_no in range(start_at, end_at + 1):

    md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

    with open(f"Initial_LLM_prompts_and_translations_main_text/Chinese_Translation_JSON/{song_no}_{md_name_part}.json", "r", encoding="utf8") as f:
        data_dict = json.load(f)

    with open(f"cross_lingual_LLM_prompts_and_translations_main_text/Chinese_Translation_JSON/{song_no}_{md_name_part}.json", "r", encoding="utf8") as f:
        data_dict_2 = json.load(f)

    reflection_prompt_formatted = reflection_prompt.format(japanese_text=data_dict['input_text'],
                                                           chinese_translation_1=data_dict['chinese_translation'],
                                                           chinese_translation_2=data_dict_2['chinese_translation'])

    updated_translation = generate(client,generate_content_config,model,prompt = reflection_prompt_formatted)

    updated_translation_dict = json.loads(updated_translation)
    #print(updated_translation_dict['updated_translation'])
    #print(updated_translation_dict['comment'])

    data_dict['chinese_translation_1'] = data_dict['chinese_translation']
    data_dict['chinese_translation_2'] = data_dict_2['chinese_translation']

    del data_dict['chinese_translation']

    data_dict['updated_chinese_translation'] = updated_translation_dict['updated_translation']
    data_dict['comment'] = updated_translation_dict['comment']
    data_dict['japanese_translation'] = data_dict['input_text']
    del data_dict['input_text']

    md_output = md_template.format(translated_language="Chinese", ainu_title=data_dict['ainu_title'],
                formatted_prompt=reflection_prompt_formatted,
                japanese_title=data_dict['japanese_title'], input_japanese = data_dict['japanese_translation'],
                output= f"{data_dict['updated_chinese_translation']}\n\n{data_dict['comment']}")

    with open(f"Updated_LLM_prompts_and_translations_main_text/Chinese_Translation_JSON/{song_no}_{md_name_part}.json", "w", encoding="utf8") as f:
        json.dump(data_dict, f, ensure_ascii=False, indent=4)

    with open(f"Updated_LLM_prompts_and_translations_main_text/Chinese_Translation/{song_no}_{md_name_part}.md", "w", encoding="utf8") as f:
        f.write(md_output)



{
  "comment": "After a thorough comparison, Translation 2 is superior in terms of accuracy, readability, and poetic flow. Translation 1 is more literal and lacks the storytelling quality present in the original Japanese text. Translation 2 captures the essence of the Ainu chant more effectively. However, Translation 1 has some good points on directly translating some of the key words. The updated translation below incorporates the strengths of both translations, prioritizing accuracy and readability while maintaining a poetic tone suitable for a traditional Ainu chant.",
  "updated_translation": "兔子親述之歌：「サンパヤ テレケ」(Sanpaya Tereke)\n\n躍過兩道山谷，跳過三道山谷，我蹦蹦跳跳，\n嬉戲玩耍，緊隨兄長的腳步，一同前往山林深處。\n每日每日，我總在後頭追趕，卻只見，\n人類精心設下的弓弩陷阱，被兄長輕易破壞殆盡。\n我總為此景開懷大笑，視為尋常樂事，不以為意。\n但有一天，啊，多麼驚恐！\n萬萬沒想到，兄長竟被那可惡的陷阱所困，\n痛苦地哀嚎，絕望地呼喊。\n\n我驚慌失措，一躍而至他身旁，\n他含淚訴說，聲音低沉而顫抖：\n「親愛的弟弟，快跑，別再耽擱片刻，\n趕緊回到我們的村莊後方，大聲宣告，\n『你的兄長被弓弩困住了！』——用盡全力呼喊，『フオホホーイ(Hoh ho-hoy)！』務必勇敢。」\n\n我仔細聆聽，連聲應道：「是，是，」\n然後立刻跳躍而去，玩樂的心情早已煙消雲散。\n跨越兩道山谷，越過三道山谷，我飛奔，\n抵達我們村莊的後方