In [1]:
from google import genai
from google.genai import types
import base64
import json
import os
from dotenv import load_dotenv

from pydantic import BaseModel, Field

import unicodedata
import re

In [2]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [3]:
start_at = 0
end_at = 0

In [4]:
class Translation(BaseModel):
    """The translation, and the comments"""
    translation: str = Field(description="The translation")
    comment: str = Field(description="Comments on the translation")

In [5]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English. You can also translate Chinese into English, and English into Chinese."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [6]:
poetic_translation_prompt = """Translate the following text from Chinese to English. The original text is translation from Japanese which is a prologue. 
Keep the original meaning. Use modern and simple English.
If a term cannot be translated, keep the original language."""

#descriptive_translation_prompt = "Translate the following text from Japanese to Chinese. Keep the original meanings. Display in Traditional Chinese."

In [7]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [8]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [9]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/raw_output_md_template_cross_lingual", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [10]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /,input_text :str, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}\n\n{input_text}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part



In [11]:
with open(f"Initial_LLM_prompts_and_translations_main_text/Chinese_Translation_JSON/Chiri_Yukie_prologue.json", "r", encoding="utf8") as f:
    data_dict = json.load(f)

    translation = generate(client,generate_content_config,model,input_text=data_dict['chinese_translation'],prompt=poetic_translation_prompt)

    translation_dict = json.loads(translation)

    #descriptive_translation = generate(client,generate_content_config,model,input_text=japanese_story,prompt=descriptive_translation_prompt)

    md_output = md_template.format(translated_language="English", original_language="Chinese", ainu_title="",
                                poetic_prompt=poetic_translation_prompt,
                                japanese_title=japanese_titles[0],
                                input_text = data_dict['chinese_translation'],
                                output_poetic=translation_dict['translation'])

    data_dict['english_translation'] = translation_dict['translation']
    data_dict['comment'] = translation_dict['comment']

    with open(f"cross_lingual_LLM_prompts_and_translations_main_text/English_Translation_JSON/Chiri_Yukie_prologue.json", "w", encoding="utf8") as f:
        json.dump(data_dict, f, ensure_ascii=False, indent=4)

    with open(f"cross_lingual_LLM_prompts_and_translations_main_text/English_Translation/Chiri_Yukie_prologue_to_Chinese.md", "w", encoding="utf8") as f:
        f.write(md_output)



{
  "comment": "Translated from Chinese to English, maintaining the original meaning and using modern, simple English. Terms that could not be directly translated were kept in their original language.",
  "translation": "Prologue\n\nLong ago, this vast Hokkaido was the free land of our ancestors. Like innocent children, they were embraced by beautiful nature, living leisurely and happily. They were truly the darlings of nature, such fortunate people!\n\nIn winter, they kicked away the thick snow covering the forests, braving the freezing cold to hunt bears across mountains and fields. In summer, accompanied by the songs of white gulls on the breezy, green-waved sea, they floated on leaf-like canoes, fishing all day long. In the flower-filled spring, they basked in the gentle sunlight, celebrating with the ever-singing birds, picking Fuki (蜂斗菜) and mugwort. In the autumn of fiery red leaves, they parted the wind-blown, neatly aligned Susuki (芒草), and until the bonfires for salmon fishin