In [1]:
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from pydantic import BaseModel, Field
import json

import unicodedata
import re

In [2]:
load_dotenv()
project_name = os.getenv('PROJECT_NAME')

In [3]:
client = genai.Client(
      vertexai=True,
      project=project_name,
      location="us-central1",
)

model = "gemini-2.0-flash-001"

generate_content_config = types.GenerateContentConfig(
    temperature = 0,
    top_p = 0,
  )

In [4]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [5]:
chinese_extraction_prompt = "The provided content is a Chinese translation of the Ainu original text in Markdown. Please extract the Chinese translation from the last output."
english_extraction_prompt = "The provided content is an English translation of the Ainu original text in Markdown. Please extract the English translation from the last output."

In [6]:
def generate(client: genai.Client,generate_content_config :types.GenerateContentConfig,model :str, /,input_text :str, prompt :str):

    text_full_prompt = text1 = types.Part.from_text(text=f"{prompt}\n\n{input_text}")

    output = ""

    contents = [
      types.Content(
        role="user",
        parts=[
          text_full_prompt
        ]
      )
    ]

    for chunk in client.models.generate_content_stream(
        model = model,
        contents = contents,
        config = generate_content_config,
        ):
        print(chunk.text, end="")
        output += chunk.text

    return output

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part



In [7]:
# process the translation in batch mode
with open(f"Chiri_Japanese_Translation/prologue.txt", "r", encoding="utf8") as f:
    japanese_story = f.read()
    japanese_story = unicodedata.normalize('NFKC', japanese_story)

with open(f"LLM_prompts_and_raw_translations_main_text/Chinese_Translation/Chiri_Yukie_prologue_to_Chinese.md", "r", encoding="utf8") as f:
    md_chinese_text = f.read()
    md_chinese_text = unicodedata.normalize('NFKC', md_chinese_text)

    chinese_translation_extraction = generate(client,generate_content_config,model,input_text=md_chinese_text,prompt=chinese_extraction_prompt)

    chinese_translation_extraction = re.sub(r'```','',chinese_translation_extraction)

with open(f"LLM_prompts_and_raw_translations_main_text/English_Translation/Chiri_Yukie_prologue_to_English.md", "r", encoding="utf8") as f:
    md_english_text = f.read()
    md_english_text = unicodedata.normalize('NFKC', md_english_text)

    english_translation_extraction = generate(client,generate_content_config,model,input_text=md_english_text,prompt=english_extraction_prompt)
    english_translation_extraction = re.sub(r'```','',english_translation_extraction)


    data_dict = {'ainu_title': "", 'japanese_title': japanese_titles[0], 'japanese_translation': japanese_story, 'chinese_translation': chinese_translation_extraction, 'english_translation': english_translation_extraction}

with open(f"raw_translations_main_text_json/Chiri_Yukie_prologue.json", "w", encoding="utf8") as f:
    json.dump(data_dict, f, ensure_ascii=False, indent=4)



序

在很久以前,這片廣闊的北海道,是我們祖先自由的天地。他們像天真爛漫的孩童一般,被美麗的大自然擁抱,悠閒快樂地生活著,真是大自然的寵兒,多麼幸福的人們啊!

冬日裡,他們踢開覆蓋林野的厚厚積雪,不畏冰凍天地的寒氣,翻山越嶺地獵熊;夏日裡,他們在涼風吹拂、綠波蕩漾的海面上,與白鷗的歌聲為伴,漂浮在如樹葉般的小舟上,終日捕魚;在鮮花盛開的春天,他們沐浴在柔和的陽光下,與永遠歌唱的小鳥一同歡度時光,採摘蜂斗菜和艾蒿;在紅葉似火的秋天,他們撥開被狂風吹拂、穗頭整齊的芒草,直到深夜捕鮭魚的篝火熄滅,在山谷間鹿鳴聲聲的陪伴下,對著皎潔的明月編織夢想。啊,多麼快樂的生活啊!和平的境地,如今都已是往昔,夢想破滅數十年,這片土地發生了急劇的變化,山野變成村莊,村莊變成城鎮,逐漸地開墾發展。

太古時代的自然風貌不知何時已漸漸消失,曾經在田野山邊快樂生活著的許多人民,他們的去向又在何方?僅僅殘存的我們同族,對於不斷前進的世態,只能驚訝地睜大眼睛。而且從那雙眼睛裡,昔日人們被一舉一動宗教觀念所支配的美麗靈魂的光輝已經消失,充滿著不安和不滿,變得遲鈍黯淡,看不清前方的道路,不得不依靠別人的慈悲,多麼可悲的姿態,啊,即將滅亡的事物......那就是現在我們的名字,我們擁有著多麼悲傷的名字啊!

在很久以前,幸福的我們的祖先,一定連一點點都無法想像,自己的這片故鄉最終會變成如此淒慘的模樣吧。

時間不斷流逝,世界無限發展。在激烈的競爭場裡,即使是現在的我們,正暴露著敗北的醜態,但總有一天,如果能出現兩三位堅強的人,與不斷前進的時代並肩前行的日子,也終將到來吧。那真的是我們迫切的願望,是我們日夜祈禱的事情。

但是......我們敬愛的祖先,為了在日常生活中互相溝通而使用的眾多語言,那些被說舊了、被遺留下來的許多美麗的詞語,難道也要全部無情地,隨著即將滅亡的弱者一同消失嗎?啊,那是多麼令人悲痛和惋惜的事情啊!

身為阿伊努人,在阿伊努語中成長的我,在下雨的夜晚,下雪的夜晚,每當有空閒的時候,便聚集在一起,將我們的祖先所講述的各種故事中,極小的一部分,用拙劣的筆觸記錄下來。

如果能讓了解我們的許多人閱讀到這些文字,我將與我們的同族祖先一同,感到無比的喜悅,無上的幸福。

大正十一年三月一日

知里幸惠
## Preface

Long ago, this vast land of Hokka