In [78]:
from google import genai
from google.genai import types
import base64
import json
import os
from dotenv import load_dotenv

from pydantic import BaseModel, Field

import unicodedata
import re

In [79]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [None]:
start_at = 4
end_at = 4

In [81]:
class Translation(BaseModel):
    """The translation, and the comments"""
    translation: str = Field(description="The translation")
    comment: str = Field(description="Comments on the translation")

In [82]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English. You can also translate Chinese into English, and English into Chinese."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [83]:
translation_prompt = """Translate the following text from English to Chinese. The original text is translation from Japanese which is a Japanese translation of a Ainu chant, sung by Ainu god telling his story. 
Use story-telling and poetic tone. Keep the original meaning. Use modern Chinese. Display in Traditional Chinese. Only allow Chinese and English characters.
If a term cannot be translated, keep the original language."""

#descriptive_translation_prompt = "Translate the following text from Japanese to Chinese. Keep the original meanings. Display in Traditional Chinese."

In [84]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [85]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [86]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/raw_output_md_template_cross_lingual", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [87]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /,input_text :str, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}\n\n{input_text}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part



In [88]:
# process the translation in batch mode

for song_no in range(start_at, end_at + 1):

    md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

    with open(f"Initial_LLM_prompts_and_translations_main_text/English_Translation_JSON/{song_no}_{md_name_part}.json", "r", encoding="utf8") as f:
        data_dict = json.load(f)

        poetic_translation = generate(client,generate_content_config,model,input_text=data_dict['english_translation'],prompt=translation_prompt)

        poetic_translation_dict = json.loads(poetic_translation)

        #descriptive_translation = generate(client,generate_content_config,model,input_text=japanese_story,prompt=descriptive_translation_prompt)

        md_output = md_template.format(translated_language="Chinese", original_language="English", ainu_title=ainu_titles[song_no - 1],
                                poetic_prompt=poetic_translation_prompt,
                                japanese_title=japanese_titles[song_no],
                                input_text = data_dict['english_translation'],
                                output_poetic=poetic_translation_dict['translation'])

        data_dict['chinese_translation'] = poetic_translation_dict['translation']
        data_dict['comment'] = poetic_translation_dict['comment']
        data_dict['japanese_translation'] = data_dict['input_text']
        del data_dict['input_text']

        with open(f"cross_lingual_LLM_prompts_and_translations_main_text/Chinese_Translation_JSON/{song_no}_{md_name_part}.json", "w", encoding="utf8") as f:
            json.dump(data_dict, f, ensure_ascii=False, indent=4)

        with open(f"cross_lingual_LLM_prompts_and_translations_main_text/Chinese_Translation/{song_no}_{md_name_part}_to_Chinese.md", "w", encoding="utf8") as f:
            f.write(md_output)



{
  "comment": "Translated from English to Chinese (Traditional). The translation aims to capture the poetic and storytelling tone of the original text, while maintaining accuracy and readability in modern Chinese.",
  "translation": "貓頭鷹神之歌：\n「銀色的露珠落下，落在我身旁。」\n\n我唱著歌，「銀色的露珠落下，落在我身旁，金色的露珠落下，落在我身旁，」沿著河流降落，經過一個人類的村莊。我向下看去，看見曾經貧窮的人變得富有，曾經富有的人變得貧窮。\n\n孩子們在海灘上玩著玩具弓箭。當我經過他們頭頂，唱著，「銀色的露珠落下，落在我身旁，金色的露珠落下，落在我身旁，」他們在我身下奔跑，呼喊著：\n\n「一隻美麗的鳥！是神明的鳥！快，射下來！誰先射中神明的鳥，誰就是真正的英雄，真正的強者！」\n\n新富人家的孩子們，用金色的弓和金色的箭，瞄準我。我躲避著他們的金色箭矢，忽上忽下地飛翔。在他們之中，有一個孩子與眾不同，他拿著簡單的木弓和木箭。我看得出他很貧窮，甚至從他的衣著就能看出來。然而，在他的眼中，我看到了偉大的血統，一種與眾不同的精神。他也用他那簡陋的弓箭瞄準我。\n\n新富人家的孩子們嘲笑著說：「多麼愚蠢啊，窮孩子！這隻鳥，這隻神明的鳥，甚至不會注意到我們的金色箭矢。牠怎麼會在意你那根腐朽的木箭呢？」\n\n他們踢打、推搡那個窮孩子，但他毫不在意，專注地看著我。看到這一幕，我感到深深的憐憫。\n\n唱著，「銀色的露珠落下，落在我身旁，金色的露珠落下，落在我身旁，」我在天空中緩慢地盤旋。那個窮孩子一腳在前，一腳在後，咬著嘴唇，全神貫注，然後鬆開了他的箭。那支小小的箭優美地飛向我，我伸出手，抓住了它。\n\n旋轉著，我在風中起舞，然後降落。孩子們競相奔跑，揚起陣陣塵土。當我落在地上時，那個窮孩子第一個跑到我身邊，將我捧在手中。\n\n新富人家的孩子們跑了過來，用侮辱的言語淹沒他，推打著他。「可惡的孩子，窮孩子！你竟然敢搶先拿走我們想要的東西！」那個窮孩子用身體保護著我，緊緊地將我抱在胸前。\n\n掙扎著，他終於掙脫了束縛，跑了起來，