In [1]:
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from pydantic import BaseModel, Field
import json

import unicodedata
import re

from pydantic import BaseModel, Field

In [2]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [3]:
start_at = 1
end_at = 13

In [4]:
class Translation(BaseModel):
    """The updated translation, and the comments for the update"""
    updated_translation: str = Field(description="The updated translation")
    comment: str = Field(description="Comments on the translation update")

In [5]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English. You can also translation Chinese into English, and English into Chinese."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [6]:
reflection_prompt = """You are translating the following Japanese text into English. The original text is a Japanese translation of footnotes a Ainu chant. 
You have 2 versions of English translations at hand.

Here are your tasks:

1. Compare the two English translations with the original Japanese text. List out the Pros and Cons of the two Translations.
2. Choose a better translation. Accuracy of meaning is the most important criterion. Easy to understand is the second.
3. Based on the better translation, translate the Japanese text into English again, incorporating the Pros of the two translations.

Keep the original Japanese meaning accurately. Use modern and simple English for easy understanding. 
If a term cannot be translated, keep the original language.
For the text which are not in Japanese, keep the original form. 

This is the Japanese text.
{japanese_text}

This is the English Translation 1.
{english_translation_1}

This is the English Translation 2.
{english_translation_2}
"""


In [7]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part




In [8]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/iter1_output_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [9]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [10]:
for song_no in range(start_at, end_at + 1):

    md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

    with open(f"Initial_LLM_prompts_and_translations_footnotes/English_Translation_JSON/{song_no}_{md_name_part}.json", "r", encoding="utf8") as f:
        data_dict = json.load(f)

    with open(f"cross_lingual_LLM_prompts_and_translations_footnotes/English_Translation_JSON/{song_no}_{md_name_part}.json", "r", encoding="utf8") as f:
        data_dict_2 = json.load(f)

    reflection_prompt_formatted = reflection_prompt.format(japanese_text=data_dict['japanese_translation'],
                                                           english_translation_1=data_dict['english_translation'],
                                                           english_translation_2=data_dict_2['english_translation'])

    updated_translation = generate(client,generate_content_config,model,prompt = reflection_prompt_formatted)

    updated_translation_dict = json.loads(updated_translation)
    #print(updated_translation_dict['updated_translation'])
    #print(updated_translation_dict['comment'])

    data_dict['english_translation_1'] = data_dict['english_translation']
    data_dict['english_translation_2'] = data_dict_2['english_translation']

    del data_dict['chinese_translation']

    data_dict['updated_english_translation'] = updated_translation_dict['updated_translation']
    data_dict['comment'] = updated_translation_dict['comment']

    md_output = md_template.format(translated_language="English", ainu_title=data_dict['ainu_title'],
                formatted_prompt=reflection_prompt_formatted,
                japanese_title=data_dict['japanese_title'], input_japanese = data_dict['japanese_translation'],
                output= f"{data_dict['updated_english_translation']}\n\n{data_dict['comment']}")

    with open(f"Updated_LLM_prompts_and_translations_footnotes/English_Translation_JSON/{song_no}_{md_name_part}.json", "w", encoding="utf8") as f:
        json.dump(data_dict, f, ensure_ascii=False, indent=4)

    with open(f"Updated_LLM_prompts_and_translations_footnotes/English_Translation/{song_no}_{md_name_part}.md", "w", encoding="utf8") as f:
        f.write(md_output)



{
  "comment": "Here's a comparison of the two English translations, a selection of the better one, and a revised translation incorporating the strengths of both:\n\n**Analysis of Translations:**\n\n**Translation 1:**\n\n*   **Pros:**\n    *   More verbose, providing slightly more context in certain explanations.\n    *   Clear and easy to understand.\n*   **Cons:**\n    *   Sometimes a bit too wordy, which can reduce clarity.\n\n**Translation 2:**\n\n*   **Pros:**\n    *   More concise and direct.\n    *   Good flow and readability.\n*   **Cons:**\n    *   Can be slightly less detailed in explanations compared to Translation 1.\n\n**Choice of Better Translation:**\n\nTranslation 2 is slightly better due to its conciseness and readability. Accuracy is similar in both, but Translation 2 presents the information more efficiently.\n\n**Revised Translation (incorporating Pros from both):**",
  "updated_translation": "(1) In the past, when boys grew up a little, they were given small bows a