In [216]:
from google import genai
from google.genai import types
import base64
import json
import os
from dotenv import load_dotenv

from pydantic import BaseModel, Field

import unicodedata
import re

In [217]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [218]:
start_at = 8
end_at = 8

In [219]:
class Translation(BaseModel):
    """The translation, and the comments"""
    translation: str = Field(description="The translation")
    comment: str = Field(description="Comments on the translation")

In [220]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [221]:
poetic_translation_prompt = """Translate the following text from Japanese to Chinese. The original text is a Japanese translation of a Ainu chant, sung by Ainu god telling his story. 
Use story-telling and poetic tone. Keep the original Japanese meaning accurately. Use modern Chinese. Display in Traditional Chinese.
If a term cannot be translated, keep the original language."""

#descriptive_translation_prompt = "Translate the following text from Japanese to Chinese. Keep the original meanings. Display in Traditional Chinese."

In [222]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [223]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [224]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/raw_output_md_template_v2", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [225]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /,input_text :str, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}\n\n{input_text}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part



In [226]:
# process the translation in batch mode

for song_no in range(start_at, end_at + 1):
    with open(f"Chiri_Japanese_Translation/story_translation_{song_no}.txt", "r", encoding="utf8") as f:
        japanese_story = f.read()
        japanese_story = unicodedata.normalize('NFKC', japanese_story)

        poetic_translation = generate(client,generate_content_config,model,input_text=japanese_story,prompt=poetic_translation_prompt)

        poetic_translation_dict = json.loads(poetic_translation)

        #descriptive_translation = generate(client,generate_content_config,model,input_text=japanese_story,prompt=descriptive_translation_prompt)

        md_output = md_template.format(translated_language="Chinese", ainu_title=ainu_titles[song_no - 1],
                               poetic_prompt=poetic_translation_prompt,
                               japanese_title=japanese_titles[song_no], input_japanese = japanese_story,
                               output_poetic=poetic_translation_dict['translation'])
        
        md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

        data_dict = dict(ainu_title=ainu_titles[song_no - 1], japanese_title=japanese_titles[song_no], 
                         input_text = japanese_story, chinese_translation=poetic_translation_dict['translation'])

        with open(f"Initial_LLM_prompts_and_translations_main_text/Chinese_Translation_JSON/{song_no}_{md_name_part}.json", "w", encoding="utf8") as f:
            json.dump(data_dict, f, ensure_ascii=False, indent=4)

        with open(f"Initial_LLM_prompts_and_translations_main_text/Chinese_Translation/{song_no}_{md_name_part}_to_Chinese.md", "w", encoding="utf8") as f:
            f.write(md_output)



{
  "comment": "This is a translation of an Ainu chant, sung by an Ainu god telling his story. I have tried to maintain the poetic tone and storytelling style while accurately conveying the original meaning in modern, Traditional Chinese.",
  "translation": "海神親自吟唱的歌謠「アトイカ\nトマトマキ クントテアシ フム フム!」\n\nアトイカ トマトマキ クントテアシ フムフム\n長兄，六位兄長，長姊，六位姊姊。\n短兄，六位兄長，短姊，六位姊姊。\n他們養育著我，而我呢？\n在堆滿寶物之處搭建高床，於那高床之上，\n只是靜靜地雕刻刀鞘，\n日復一日，以此為生。\n每日清晨，兄長們便\n背起箭筒，與姊姊們一同出門，\n到了傍晚，總是面帶疲色地\n空手而歸。姊姊們明明也很疲憊，卻仍要準備餐食，為我擺上飯菜，\n自己也用過餐後，收拾完畢，\n兄長們便又忙碌地製作弓箭。\n待箭筒裝滿，大家因為疲憊不堪，\n入睡後便發出震天鼾聲。\n隔天，天色未亮，\n眾人便起身，姊姊們準備餐食，為我擺上飯菜，\n大家用過餐後，又背起箭筒，\n出門而去。傍晚時分，又\n面帶疲色地空手而歸，\n姊姊們準備餐食，兄長們製作弓箭，\n日復一日，年復一年，總是重複著相同的事情。\n某日，兄長們和姊姊們又\n背起箭筒，出門而去。\n我本在雕刻寶物，不久便\n起身站上高床，手持金弓\n金箭，走出戶外。\n放眼望去，大海遼闊平靜，\n鯨魚們在海的東邊、海的西邊，\n嬉戲玩耍。此時，\n海的東邊，長姊和六位姊姊手牽著手，圍成一個圓圈，\n短姊和六位姊姊，則將鯨魚趕入圈中，\n長兄和六位兄長，短兄和六位兄長，\n瞄準圈中的鯨魚射箭，箭矢\n從鯨魚的下方穿過，從鯨魚的上方掠過。\n他們每日每日，都在做著這樣的事情。\n我看到大海中央，\n巨大的鯨魚和牠的孩子，正在海面上上下下地\n嬉戲玩耍，\n便從遠處搭上金弓金箭，\n瞄準射去，一箭\n同時貫穿了鯨魚母子。\n於是，我將其中一條鯨魚從中間斬斷，\n將一半拋入姊姊們的圈中。\n然後，將另一條半的鯨魚\n