In [91]:
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from pydantic import BaseModel, Field
import json

import unicodedata
import re

from pydantic import BaseModel, Field

In [92]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [93]:
start_at = 2
end_at = 13

In [94]:
class Translation(BaseModel):
    """The updated translation, and the comments for the update"""
    updated_translation: str = Field(description="The updated translation")
    comment: str = Field(description="Comments on the translation update")

In [95]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [96]:
reflection_prompt = """You are translating the following Japanese text into Chinese. The original text is a chant sung by god telling his story. You already have an initial Chinese translation.

Here are your tasks:

1. Compare and criticise the Chinese translation based on the Japanese text. 
3. Improve the Chinese translation based on the comparison results.

Keep the original meanings. Display in Traditional Chinese. If a term cannot be translated, keep the original language.

This is the Japanese text.
{japanese_text}

This is the Chinese translation.
{chinese_translation}
"""


In [97]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part




In [98]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/iter1_output_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [99]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [100]:
for song_no in range(start_at, end_at + 1):

    md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

    with open(f"raw_translations_main_text_json/{song_no}_{md_name_part}.json", "r", encoding="utf8") as f:
        data_dict = json.load(f)

    reflection_prompt_formatted = reflection_prompt.format(japanese_text=data_dict['japanese_translation'],chinese_translation=data_dict['chinese_translation'])

    updated_translation = generate(client,generate_content_config,model,prompt = reflection_prompt_formatted)

    updated_translation_dict = json.loads(updated_translation)
    print(updated_translation_dict['updated_translation'])
    print(updated_translation_dict['comment'])

    data_dict['updated_chinese_translation'] = updated_translation_dict['updated_translation']
    data_dict['update_comment'] = updated_translation_dict['comment']

    md_output = md_template.format(translated_language="Chinese", ainu_title=data_dict['ainu_title'],
                formatted_prompt=reflection_prompt_formatted,
                japanese_title=data_dict['japanese_title'], input_japanese = data_dict['japanese_translation'],
                output= f"{data_dict['updated_chinese_translation']}\n\n{data_dict['update_comment']}")

    with open(f"updated_translations_main_text_iter1/Mono_Chinese_Translation_JSON/{song_no}_{md_name_part}.json", "w", encoding="utf8") as f:
        json.dump(data_dict, f, ensure_ascii=False, indent=4)

    with open(f"updated_translations_main_text_iter1/Mono_Chinese_Translation/{song_no}_{md_name_part}.md", "w", encoding="utf8") as f:
        f.write(md_output)



{
  "comment": "The original translation is generally accurate, but some phrases can be improved for better flow and nuance in Chinese. The repetition of \"海的恩賜\" feels a bit clunky and can be simplified. The description of the wife's reaction can also be more vivid. The phrase \"霍哦霍伊\" is kept as is, since it is onomatopoeia. The term \"簗\" is kept as is, since it is a technical term. Overall, the updated translation aims for a more natural and impactful reading experience in Traditional Chinese.",
  "updated_translation": "狐狸自歌的謠曲「托瓦托瓦托」\n\n托瓦托瓦托\n某日，我前往海邊尋找食物。\n石礫中，鏘啦鏘啦；\n木片中，鏘啦鏘啦。\n一邊走一邊看著我的去向，\n只見海邊擱淺了一條鯨魚，\n人們都盛裝打扮，\n歡慶海的恩澤，歡欣鼓舞，\n切肉的人、搬運的人，\n來來往往，絡繹不絕，\n重要的人物們感謝海的恩澤，並向其膜拜，\n磨刀的人等等，黑壓壓一片，佈滿海灘。\n我看到這景象，非常高興。\n「啊，真想快點到那裡，\n哪怕只分到一點也好。」\n心裡這樣想著，一邊叫著「萬歲！萬歲！」\n一邊\n石礫中，鏘啦鏘啦；\n木片中，鏘啦鏘啦。\n走啊走，走到近處一看，\n卻完全出乎我的意料，\n我原以為是鯨魚擱淺，\n其實是海灘上有一處狗的便所，\n堆著一座巨大的糞山，\n我把它當成了鯨魚。\n我原以為人們歡慶海的恩澤，歡欣鼓舞，\n切肉搬運，\n其實是烏鴉們\n啄食糞便，將糞便撥來撥去，\n往那邊飛，往這邊飛。\n我非常生氣。\n「眼光昏花的沒用傢伙，\n眼光昏花的壞傢伙，\n尾巴下面臭烘烘的傢伙，\n尾巴下面腐爛的傢伙，\n屁股裡流出松脂的