In [78]:
from google import genai
from google.genai import types
import base64
import json
import os
from dotenv import load_dotenv

from pydantic import BaseModel, Field

import unicodedata
import re

In [79]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [80]:
start_at = 2
end_at = 13

In [81]:
class Translation(BaseModel):
    """The translation, and the comments"""
    translation: str = Field(description="The translation")
    comment: str = Field(description="Comments on the translation")

In [82]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [83]:
poetic_translation_prompt = """Translate the following text from Japanese to English. The original text is a Japanese translation of a Ainu chant, sung by Ainu god telling his story. 
Use story-telling and poetic tone. Keep the original Japanese meaning accurately. Use modern and simple English.
If a term cannot be translated, keep the original language."""

#descriptive_translation_prompt = "Translate the following text from Japanese to Chinese. Keep the original meanings. Display in Traditional Chinese."

In [84]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [85]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [86]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/raw_output_md_template_v2", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [87]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /,input_text :str, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}\n\n{input_text}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part



In [88]:
# process the translation in batch mode

for song_no in range(start_at, end_at + 1):
    with open(f"Chiri_Japanese_Translation/story_translation_{song_no}.txt", "r", encoding="utf8") as f:
        japanese_story = f.read()
        japanese_story = unicodedata.normalize('NFKC', japanese_story)

        poetic_translation = generate(client,generate_content_config,model,input_text=japanese_story,prompt=poetic_translation_prompt)

        poetic_translation_dict = json.loads(poetic_translation)

        #descriptive_translation = generate(client,generate_content_config,model,input_text=japanese_story,prompt=descriptive_translation_prompt)

        md_output = md_template.format(translated_language="English", ainu_title=ainu_titles[song_no - 1],
                               poetic_prompt=poetic_translation_prompt,
                               japanese_title=japanese_titles[song_no], input_japanese = japanese_story,
                               output_poetic=poetic_translation_dict['translation'])
        
        md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

        data_dict = dict(ainu_title=ainu_titles[song_no - 1], japanese_title=japanese_titles[song_no], 
                         input_text = japanese_story, english_translation=poetic_translation_dict['translation'])

        with open(f"Initial_LLM_prompts_and_translations_main_text/English_Translation_JSON/{song_no}_{md_name_part}.json", "w", encoding="utf8") as f:
            json.dump(data_dict, f, ensure_ascii=False, indent=4)

        with open(f"Initial_LLM_prompts_and_translations_main_text/English_Translation/{song_no}_{md_name_part}_to_English.md", "w", encoding="utf8") as f:
            f.write(md_output)



{
  "comment": "Translated from Japanese to English, maintaining a storytelling and poetic tone. Terms that are difficult to translate directly are kept in their original Japanese. The translation aims for accuracy while using modern and simple English.",
  "translation": "The Fox's Own Song: \"Towatowato\"\n\nTowatowato,\nOne day, I went to the shore to gather food.\nAmong the stones, chara-chara,\nAmong the driftwood, chara-chara.\nAs I went, I looked ahead and saw\nA whale had washed ashore.\nThe people were all dressed in finery,\nRejoicing in the bounty of the sea, dancing in celebration,\nSome cutting the meat, others carrying it away.\nThe important ones offered thanks and prayers to the sea.\nSome sharpened their knives; the beach was black with people.\nSeeing this, I was overjoyed.\n\"Ah, I must hurry and get there,\nI want to receive even a little bit!\" I thought,\nAnd shouting, \"Banzai! Banzai!\"\nAmong the stones, chara-chara,\nAmong the driftwood, chara-chara.\nI went c