In [1]:
import os
import httpx
import json

import unicodedata
import re

from typing import Dict

The story id, from 1 to 13 according to the 13 Ainu Kamuy Yukars translated by Chiri Yukie. The Yukar ID starts at 1. Chiri's Preface is 0.

In [2]:
start_at = 1
end_at = 13

In [3]:
# read the content page of Japanese translation and get the Japanese translated title
with open("Chiri_Japanese_Translation/content.txt", "r", encoding="utf8") as f:
    japanese_content = f.read()
    japanese_content = unicodedata.normalize('NFKC', japanese_content)


s=re.split(r'\n\n', japanese_content)
japanese_titles = re.split(r'\n', s[1])

In [4]:
# read the content page of Ainu original text and get the original title
with open("original_Ainu_text/content.txt", "r", encoding="utf8") as f:
    ainu_content = f.read()
    ainu_content = unicodedata.normalize('NFKC', ainu_content)


s=re.split(r'\n\n', ainu_content)
ainu_titles = [""] + re.split(r'\n', s[1])

In [5]:
# Read the markdown template

#read in the template
with open("templates/updated_footnotes_md_template", "r", encoding="utf8") as f:
    md_template = f.read()
    md_template = unicodedata.normalize('NFKC', md_template)

In [6]:
def output_data(japanese_title: str, ainu_title: str, japanese_text: str, chinese_translation: Dict, english_translation: Dict, 
                eng_to_chi_translation: Dict, chi_to_eng_translation: Dict, updated_translation_zh: Dict, updated_translation_en: Dict):
    output_dict = dict(japanese_title = japanese_title, ainu_title = ainu_title, japanese_text = japanese_text)
    output_dict['chinese_translation'] = chinese_translation
    output_dict['english_translation'] = english_translation
    output_dict['eng_to_chi_translation'] = eng_to_chi_translation
    output_dict['chi_to_eng_translation'] = chi_to_eng_translation
    output_dict['updated_translation_zh'] = updated_translation_zh
    output_dict['updated_translation_en'] = updated_translation_en

    return output_dict

def get_output_file_name_key(title :str):
    # setup the output file name
    s = title.split()
    md_name_part = s[0]

    name_2nd_part = ""

    for text in s:
        if text.startswith('“'):
            name_2nd_part = text.replace('“', '').replace('”', '')
        
    md_name_part += "_" + name_2nd_part

    return md_name_part

def format_markdown(md_template, target_language, ainu_titles,japanese_titles,song_no, translation_score,updated_translation):
    md_output = md_template.format(ainu_title=ainu_titles[song_no], japanese_title=japanese_titles[song_no], translated_title = updated_translation['title'],
                                target_language = target_language,
                                system_prompt = '',
                                formatted_prompt = '',
                                score_accuracy_1 = translation_score['translation_score'][0]['score_accuracy'],
                                score_accuracy_2 = translation_score['translation_score'][1]['score_accuracy'],
                                score_understanding_1 = translation_score['translation_score'][0]['score_easy_understanding'],
                                score_understanding_2 = translation_score['translation_score'][1]['score_easy_understanding'],
                                weighted_score_1 = translation_score['translation_score'][0]['weighted_score'],
                                weighted_score_2 = translation_score['translation_score'][1]['weighted_score'],
                                translation_1 = translation_score['translation_score'][0]['text'],
                                translation_2 = translation_score['translation_score'][1]['text'],
                                comment_1 = translation_score['translation_score'][0]['comment'],
                                comment_2 = translation_score['translation_score'][1]['comment'],
                                better_choice = translation_score['better_translation'],
                                better_comment = translation_score['better_translation_comment'],
                                output = updated_translation['translation'],
                                score_poetic_1 = translation_score['translation_score'][0]['score_poetic_flow'],
                                score_poetic_2 = translation_score['translation_score'][1]['score_poetic_flow'],
                            )

    return md_output

In [None]:
for song_no in range(start_at,end_at+1):
    print (f"Processing Song {song_no}")

    with open(f"Chiri_footnotes/footnote_story_{song_no}.txt", "r", encoding="utf8") as f:
        japanese_story = f.read()
        japanese_story = unicodedata.normalize('NFKC', japanese_story)
    
    session = f"http://localhost:8000/apps/translation_agent_adk/users/default_user_{song_no}/sessions/default_user_session_{song_no}"

    r = httpx.post(session)

    input_data = {
        "app_name": "translation_agent_adk",
        "user_id": f"default_user_{song_no}",
        "session_id": f"default_user_session_{song_no}",
        "new_message": {
            "role": "user",
            "parts": [{
            "text": "This is the text to be translated. Text type is FOOTNOTE: \n" + japanese_story
            }]
        }
    }

    input_json = json.dumps(input_data, ensure_ascii=False)
    async with httpx.AsyncClient() as client:
        response = await client.post(
            url="http://0.0.0.0:8000/run",
            data=input_json,
            timeout=None,
        )

    r = httpx.delete(session)

    response_json = response.json()
    english_translation = json.loads(response_json[0]['content']['parts'][0]['text'])['translation']
    chinese_translation = json.loads(response_json[1]['content']['parts'][0]['text'])['translation']
    eng_to_chi_translation = json.loads(response_json[2]['content']['parts'][0]['text'])['translation']
    chi_to_eng_translation = json.loads(response_json[3]['content']['parts'][0]['text'])['translation']
    translation_score_zh = json.loads(response_json[4]['content']['parts'][0]['text'])
    translation_score_en = json.loads(response_json[6]['content']['parts'][0]['text'])
    updated_translation_zh = json.loads(response_json[5]['content']['parts'][0]['text'])
    updated_translation_en = json.loads(response_json[7]['content']['parts'][0]['text'])


    output_dict = output_data(japanese_title=japanese_titles[song_no], ainu_title=ainu_titles[song_no], 
                            japanese_text=japanese_story, chinese_translation=chinese_translation,
                            english_translation=english_translation, eng_to_chi_translation=eng_to_chi_translation,
                            chi_to_eng_translation=chi_to_eng_translation, updated_translation_zh=updated_translation_zh,
                            updated_translation_en=updated_translation_en)

    md_name_part = get_output_file_name_key(ainu_titles[song_no])

    with open(f"AgenticTranslationOutput_adk_footnotes/json/{song_no}_{md_name_part}.json","w", encoding="utf8") as f:
        json.dump(output_dict, f, ensure_ascii=False, indent=4)

    with open(f"AgenticTranslationOutput_adk_footnotes/raw/{song_no}_{md_name_part}.json","w", encoding="utf8") as f:
        f.write(response.text)
    
    md_output_zh = format_markdown(md_template, "Chinese", ainu_titles,japanese_titles,song_no, translation_score_zh, updated_translation_zh)

    with open(f"AgenticTranslationOutput_adk_footnotes/markdown_Chinese/{song_no}_{md_name_part}.md","w", encoding="utf8") as f:
        f.write(md_output_zh)

    md_output_en = format_markdown(md_template, "English", ainu_titles,japanese_titles,song_no, translation_score_en, updated_translation_en)

    with open(f"AgenticTranslationOutput_adk_footnotes/markdown_English/{song_no}_{md_name_part}.md","w", encoding="utf8") as f:
        f.write(md_output_en)

Processing Song 1


TypeError: format_markdown() missing 2 required positional arguments: 'translation_score' and 'updated_translation'