In [1]:
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from pydantic import BaseModel, Field
import json

import unicodedata
import re

from pydantic import BaseModel, Field

In [2]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

In [3]:
class Translation(BaseModel):
    """The updated translation, and the comments for the update"""
    updated_translation: str = Field(description="The updated translation")
    comment: str = Field(description="Comments on the translation update")

In [4]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_mime_type = 'application/json',
    response_schema = Translation,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [5]:
reflection_prompt = """You are translating the following Japanese text into English. You have an initial Chinese translation and an initial English translation.

Criticise the English translation based on the Japanese text. Compare the English translation with the Chinese translation.

Improve the English translation. Keep the originial Japanese meanings.

This is the Japanese text.
{japanese_text}

This is the initial Chinese translation.
{chinese_translation}

This is the English translation.
{english_translation}
"""


In [6]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output




In [7]:
with open(f"updated_translations_main_text_iter1/Chinese_Translation_JSON/Chiri_Yukie_prologue.json", "r", encoding="utf8") as f:
    data_dict = json.load(f)

In [8]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/iter1_output_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [9]:
reflection_prompt_formatted = reflection_prompt.format(japanese_text=data_dict['japanese_translation'],chinese_translation=data_dict['updated_chinese_translation'],english_translation=data_dict['english_translation'])

In [10]:
updated_translation = generate(client,generate_content_config,model,prompt = reflection_prompt_formatted)

{
  "comment": "The English translation is generally good, but it could be improved to better reflect the nuances and poetic tone of the original Japanese text, as well as align more closely with the Chinese translation in certain areas. Here's a breakdown:\n\n*   **\"realm of freedom\" vs. \"自由的天地\"**: The English phrase is accurate, but \"free and open land\" or \"land of freedom and independence\" might better capture the sense of unrestricted living implied in the Japanese and Chinese.\n*   **\"nature's darlings\" vs. \"大自然的寵兒\"**: The English is fine, but \"favorites of nature\" or \"children of nature\" might be closer to the original meaning and the Chinese translation.\n*   **\"leaf-like boats\" vs. \"像樹葉般的小船\"**: This is accurate, but \"boats like leaves\" might sound more natural in English.\n*   **\"crimson autumn\"**: While evocative, \"autumn of red leaves\" might be more literal and align better with the imagery.\n*   **\"parted the fields of pampas grass, whose ears sway

In [11]:
updated_translation_dict = json.loads(updated_translation)
print(updated_translation_dict['updated_translation'])
print(updated_translation_dict['comment'])

## Preface

Long ago, this vast land of Hokkaido was the free and open land for our ancestors. Like innocent children, they were embraced by the beautiful nature, living leisurely and happily. Truly, they were favorites of nature, such blessed people they were.

In winter, they kicked through the deep snow covering the forests, braving the freezing air that chilled the earth, crossing mountain after mountain to hunt bears. In summer, they sailed their boats like leaves on the green waves where cool breezes swam, the songs of white gulls their companions, fishing all day long. In the blossoming spring, they basked in the soft sunlight, spending their days singing with the eternally chirping birds, gathering butterburs and mugwort. In the autumn of red leaves, they parted the pampas grass, its ears heavy in the autumn wind, until the bonfires for catching salmon faded into the evening. With the calls of deer echoing through the valleys, they dreamt beneath the round moon. Ah, what a joyo

In [12]:
data_dict['updated_english_translation'] = updated_translation_dict['updated_translation']
data_dict['update_comment'] = updated_translation_dict['comment']

md_output = md_template.format(translated_language="English", ainu_title=data_dict['ainu_title'],
                formatted_prompt=reflection_prompt_formatted,
                japanese_title=data_dict['japanese_title'], input_japanese = data_dict['japanese_translation'],
                output= f"{data_dict['updated_english_translation']}\n\n{data_dict['update_comment']}")

with open(f"updated_translations_main_text_iter1/English_Translation_JSON/Chiri_Yukie_prologue.json", "w", encoding="utf8") as f:
    json.dump(data_dict, f, ensure_ascii=False, indent=4)

with open(f"updated_translations_main_text_iter1/English_Translation/Chiri_Yukie_prologue.md", "w", encoding="utf8") as f:
    f.write(md_output)


