In [1]:
from google import genai
from google.genai import types
import base64
import json
import os
from dotenv import load_dotenv

import unicodedata
import re

In [2]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [3]:
start_at = 1
end_at = 13

In [4]:
system_instruction_prompt = """You are a professional translator. You know Japanese, English and Chinese. You can translate Japanese into either Chinese or English."""

client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
    system_instruction=[types.Part.from_text(text=system_instruction_prompt)],
  )

In [5]:
translation_prompt = "Translate the following text from Japanese to Chinese. The original text is footnote. Keep the original meanings. For the text which are not in Japanese, keep the original form. Display in Traditional Chinese."


In [6]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [7]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = re.split(r'\n', s[1])

In [8]:
# Read the markdown template for writing the Chinese translations to Markdown file

#read in the template
with open("templates/raw_output_footnote_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [9]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /,input_text :str, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}\n\n{input_text}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part



In [10]:
# process the translation in batch mode

for song_no in range(start_at, end_at + 1):
    with open(f"Chiri_footnotes/footnote_story_{song_no}.txt", "r", encoding="utf8") as f:
        japanese_text = f.read()
        japanese_text = unicodedata.normalize('NFKC', japanese_text)

        footnote_translation = generate(client,generate_content_config,model,input_text=japanese_text,prompt=translation_prompt)

        md_output = md_template.format(translated_language="English", ainu_title=ainu_titles[song_no - 1],
                               prompt=translation_prompt,
                               japanese_title=japanese_titles[song_no], input_japanese = japanese_text,
                               output=footnote_translation)
        
        md_name_part = get_output_file_name_key(ainu_titles[song_no - 1])

        with open(f"LLM_prompts_and_raw_translations_footnotes/Chinese_Translation/{song_no}_{md_name_part}_to_Chinese.md", "w", encoding="utf8") as f:
            f.write(md_output)



好的，以下是日文文獻的翻譯，將日文翻譯成繁體中文，其餘部分保持不變：

(1) 以前男孩子稍微長大一點，就會製作小弓箭給他。孩子們就用它來射樹木或鳥類等作為目標來玩耍，不知不覺中就精通了弓箭的技術。
ak......是弓術, shinot 是遊戯, ponai 是小矢.

(2) shiktumorke......眼神。
想了解一個人的本性時，看他的眼神最清楚，如果稍微左顧右盼，就會被訓斥。

(3) achikara......「骯髒」的意思。

(4) 鳥或野獸被人射落，是因為想要人製作的箭，說是為了取箭。

(5) kotankorkamui......擁有國家或村莊的神。
山裡有 nupurikorkamui......擁有山的神（熊）和 nupuripakorkamui......擁有山東方的神（狼）等，貓頭鷹的地位僅次於熊和狼。
kotankorkamui 不像山神、山東方的神那樣粗暴慌張。因此平時總是沉著冷靜，總是閉著眼睛，除非有非常重大的事情才會睜開眼睛。

(6) eharkiso......左邊的座位。

(7) eshiso......右邊的座位。
家中央有爐灶，有窗戶的那一側是上座，從上座看過去，右邊是 eshiso，左邊是 harkiso。坐在上座的僅限男子。如果是客人等，身份比主人低的人會避免坐在上座。右邊的座位一定是主人夫婦並排坐著。右座的下一個是左座，西側（靠近門口）的座位是末座。

(8) hayokpe 冑（頭盔）。
鳥或野獸在山裡的時候，人類的眼睛看不見，但各自都有像人類一樣的家，大家都過著和人類一樣的生活，到人類的村莊來的時候，會戴上頭盔。鳥或野獸的屍體是頭盔，本體是眼睛看不見的，但在屍體的耳朵和耳朵之間。

(9) otuipe......斷了尾巴的傢伙。
像狗一樣斷了尾巴的短尾巴不太受重視。
會用 wenpe......壞傢伙, otuipe......斷了尾巴的傢伙來辱罵極其無用的人。

(10) chikashnukar. 神非常喜歡某個人的時候，在完全意想不到的地方，賜予這個人很大的幸福，這個人就會說 ikashnukar an 來表達喜悅。

(11) apehuchi......火的老婦。火神是家中最尊貴的神，一定是老奶奶。山神或海神，以及其他各種神明像貓頭鷹一樣成為客人來到家裡時，由這位 apehuchi 主持