In [561]:
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from pydantic import BaseModel, Field
import json

import unicodedata
import re

from pydantic import BaseModel, Field

In [562]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [563]:
start_at = 9
end_at = 9

In [564]:
class Translation(BaseModel):
    """The updated translation, and the comments for the update"""
    updated_translation: str = Field(description="The updated translation")
    comment: str = Field(description="Comments on the translation update")

In [565]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [566]:
reflection_prompt = """You are translating the following Japanese text into Chinese. The original text is a chant sung by god telling his story. You already have a Chinese translation and an English translation.

Here are your tasks:

1. Criticise the Chinese translation based on the Japanese text. 
2. Compare the Chinese translation with the English translation.
3. Improve the Chinese translation, especially incoporating the pros of the English translation.

Keep the original meanings. Display in Traditional Chinese.

This is the Japanese text.
{japanese_text}

This is the Chinese translation.
{chinese_translation}

This is the English translation.
{english_translation}
"""


In [567]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part




In [568]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/iter1_output_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [569]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [570]:
for song_no in range(start_at, end_at + 1):

    md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

    with open(f"raw_translations_main_text_json/{song_no}_{md_name_part}.json", "r", encoding="utf8") as f:
        data_dict = json.load(f)

    reflection_prompt_formatted = reflection_prompt.format(japanese_text=data_dict['japanese_translation'],chinese_translation=data_dict['chinese_translation'],english_translation=data_dict['english_translation'])

    updated_translation = generate(client,generate_content_config,model,prompt = reflection_prompt_formatted)

    updated_translation_dict = json.loads(updated_translation)
    print(updated_translation_dict['updated_translation'])
    print(updated_translation_dict['comment'])

    data_dict['updated_chinese_translation'] = updated_translation_dict['updated_translation']
    data_dict['update_comment'] = updated_translation_dict['comment']

    md_output = md_template.format(translated_language="Chinese", ainu_title=data_dict['ainu_title'],
                formatted_prompt=reflection_prompt_formatted,
                japanese_title=data_dict['japanese_title'], input_japanese = data_dict['japanese_translation'],
                output= f"{data_dict['updated_chinese_translation']}\n\n{data_dict['update_comment']}")

    with open(f"updated_translations_main_text_iter1/Chinese_Translation_JSON/{song_no}_{md_name_part}.json", "w", encoding="utf8") as f:
        json.dump(data_dict, f, ensure_ascii=False, indent=4)

    with open(f"updated_translations_main_text_iter1/Chinese_Translation/{song_no}_{md_name_part}.md", "w", encoding="utf8") as f:
        f.write(md_output)



{
  "comment": "The Chinese translation is generally accurate but lacks some of the nuance and flow of the original Japanese and the English translation. Specifically:\n\n*   **Repetition:** The Chinese translation repeats phrases like \"那個年輕人說道\" (that young man said) which, while accurate, makes the text feel less fluid. The English translation varies the phrasing to avoid this.\n*   **Descriptive Language:** The English translation uses more descriptive language to enhance the imagery (e.g., \"hopped and played through the meadow,\" \"smiled gently\"). The Chinese translation is more literal and less evocative.\n*   **Emphasis:** The English translation uses exclamation points and phrases like \"My heart swelled with joy!\" to convey the frog's increasing excitement. The Chinese translation is more subdued.\n*   **Cultural Context:** The terms \"Yukar\" and \"Sakehau\" are transliterated in both the Chinese and English translations. It might be helpful to add a brief explanation or 