In [16]:
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from llm import generate_string, openai_llm

class CostarPrompt:
    """Represents the CO-STAR framework prompt structure."""

    def __init__(self, context=None, objective=None, style=None, tone=None, audience=None, response=None):
        self.context = context
        self.objective = objective
        self.style = style
        self.tone = tone
        self.audience = audience
        self.response = response

    def __str__(self):
        costar_prompt = ""
        if self.context:
            costar_prompt += "# CONTEXT #\n" + self.context + "\n"
        if self.objective:
            costar_prompt += "# OBJECTIVE #\n" + self.objective + "\n"
        if self.style:
            costar_prompt += "# STYLE #\n" + self.style + "\n"
        if self.tone:
            costar_prompt += "# TONE #\n" + self.tone + "\n"
        if self.audience:
            costar_prompt += "# AUDIENCE #\n" + self.audience + "\n"
        if self.response:
            costar_prompt += "# RESPONSE #\n" + self.response + "\n"
        return costar_prompt

    def __repr__(self):
        return self.__str__()


class Translator:
    def __init__(self, show_prompt=False):
        self.show_prompt = show_prompt
        self.last_prompts = {}

    # -----------------------------------------------------------
    # STEP 1: Section Header Translation
    # -----------------------------------------------------------
    def translate_section_header(self, source_text: str, source_lang: str, target_lang: str):
        """Translate a short section header term."""
        costar_prompt = CostarPrompt(
            context=f"You are a Translator for a bank, translating financial terminology from {source_lang} to {target_lang}.",
            objective=f"Translate the given term '{source_text}' from {source_lang} to {target_lang}.",
            audience="Your audience is the bank's investment report senior editor.",
            response=f"Output just the translated term of '{source_text}' in {target_lang}. Do not include explanations or any additional text."
        )

        llm = openai_llm(temperature=0)
        result = generate_string(
            llm, str(costar_prompt), {}, show_prompt=self.show_prompt, system_prompt_only=True
        )
        return result.content

    # -----------------------------------------------------------
    # STEP 2: Full Translation
    # -----------------------------------------------------------
    def translate(self, source_text: str, source_lang: str, target_lang: str):
        """Perform initial translation in the tone of financial reports."""
        costar_prompt = CostarPrompt(
            context=f"You are a Translator for a bank, translating financial text such as investment reports from {source_lang} to {target_lang}.",
            objective=f"Translate the text '{source_text}' from {source_lang} to {target_lang}. Tone should match the style of a financial report.",
            audience="Your audience is the bank's investment report senior editor.",
            response=f"Output just the translation of '{source_text}' in {target_lang}, with no explanation or introduction."
        )

        llm = openai_llm(temperature=0)
        result = generate_string(
            llm, str(costar_prompt), {}, show_prompt=self.show_prompt, system_prompt_only=True
        )
        return result.content

    # -----------------------------------------------------------
    # STEP 3: Refinement (After Editor Suggestions)
    # -----------------------------------------------------------
    def refine_translation(self, source_text: str, initial_translated_text: str, improvements: str,
                           source_lang: str, target_lang: str):
        """Refine translation based on editor's improvement suggestions."""
        costar_prompt = CostarPrompt(
            context=f"You are a Translator for a bank, refining translations from {source_lang} to {target_lang} based on senior editor feedback.",
            objective=f"Given the initial translation:\n'{initial_translated_text}'\nand editor's comments:\n'{improvements}'\nrework the translation accordingly.",
            audience="Your audience is the bank's investment report senior editor.",
            response=f"Output the final refined translation in {target_lang} only — no explanations, just the text."
        )

        llm = openai_llm(temperature=0)
        result = generate_string(
            llm, str(costar_prompt), {}, show_prompt=self.show_prompt, system_prompt_only=True
        )
        return result.content

    # -----------------------------------------------------------
    # STEP 4: Editor Comments Generation
    # -----------------------------------------------------------
    def editor_comments(self, source_text: str, translated_text: str, target_lang: str):
        """Generate feedback from senior editor on translation quality."""
        costar_prompt = CostarPrompt(
            context="You are a senior linguistic expert that specializes in financial text translation.",
            objective=f"""
            Based on the translated text below produced by a junior translator, provide constructive comments to improve the output.

            ### Source Text ###
            {source_text}

            ### Translated Text ###
            {translated_text}

            When writing your suggestions, pay attention to whether there are ways to improve the translation's:
            (i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),
            (ii) fluency (by applying {target_lang} grammar, spelling, and punctuation rules, and ensuring there are no unnecessary repetitions unless in report headers),
            (iii) style (by ensuring the translations reflect the tone of a financial report),
            (iv) terminology (by ensuring terminology use is consistent with the financial domain in {target_lang}).
            """,
            audience="Your audience is the bank's translator who will revise the translation based on your comments.",
            response="Provide your improvement suggestions in concise bullet points."
        )

        llm = openai_llm(temperature=0)
        suggestion = generate_string(
            llm, str(costar_prompt), {}, show_prompt=self.show_prompt, system_prompt_only=True
        )
        return suggestion.content


ImportError: cannot import name 'generate_string' from 'llm' (unknown location)

In [None]:
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolNode
from langchain.schema import BaseOutputParser
# from translator import Translator
import pandas as pd


# === TOOL: terminology lookup ==================================================
def terminology_lookup(term: str) -> str | None:
    """Lookup financial terminology from terminology.csv"""
    try:
        df = pd.read_csv("terminology.csv")
        match = df[df['ENGLISH'].str.lower() == term.lower()]
        if not match.empty:
            return match.iloc[0]['CHINESE']
    except Exception as e:
        print(f"Terminology lookup failed: {e}")
    return None


# === AGENT: refinement agent ===================================================
class RefinementAgent:
    def __init__(self):
        self.translator = Translator(show_prompt=False)

    def __call__(self, state: dict) -> dict:
        """
        Executes the Refinement step.
        Input state should include:
        {
            "source_text": "...",
            "initial_translation": "...",
            "target_lang": "Chinese",
            "source_lang": "English"
        }
        """
        source_text = state["source_text"]
        initial_translation = state["initial_translation"]
        source_lang = state.get("source_lang", "English")
        target_lang = state.get("target_lang", "Chinese")

        # 1️⃣ Get editor-style comments automatically
        editor_feedback = self.translator.editor_comments(
            source_text, initial_translation, target_lang
        )

        # 2️⃣ Attempt terminology enhancement
        words = source_text.split()
        for w in words:
            translated_term = terminology_lookup(w)
            if translated_term:
                initial_translation = initial_translation.replace(w, translated_term)

        # 3️⃣ Refine translation based on the feedback
        final_translation = self.translator.refine_translation(
            source_text=source_text,
            initial_translated_text=initial_translation,
            improvements=editor_feedback,
            source_lang=source_lang,
            target_lang=target_lang,
        )

        # 4️⃣ Return new graph state
        return {
            **state,
            "editor_feedback": editor_feedback,
            "refined_translation": final_translation,
        }


In [14]:
translated_text = direct_translate(
    source_text="PBT Margin increased by 15% in Q2 compared to Q1.",
    source_lang="English",
    target_lang="Mandarin"
)


print(translated_text)
# → Bank pusat menaikkan kadar faedah sebanyak 25 mata asas.


content='PBT利润率在第二季度相比于第一季度增加了15%。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 75, 'total_tokens': 90, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_560af6e559', 'id': 'chatcmpl-CVh3pqbAWb5091gyKD8lnLTfoz8fj', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='run--050fe826-bc82-4f2b-bd98-00c007031d20-0' usage_metadata={'input_tokens': 75, 'output_tokens': 15, 'total_tokens': 90, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
PBT利润率在第二季度相比于第一季度增加了15%。


In [4]:
translated_text

'中央银行将利率上调了25个基点.'

In [8]:
# tools/terminology_lookup.py
import pandas as pd

# Example CSV (financial terminology)
# term,official_translation
# Emerging Markets,Pasaran Muncul
# AAPL,Apple Inc.

TERMINOLOGY_FILE = "terminology.csv"

def terminology_lookup(term: str) -> str | None:
    df = pd.read_csv(TERMINOLOGY_FILE)
    row = df[df["term"].str.lower() == term.lower()]
    if not row.empty:
        return row.iloc[0]["official_translation"]
    return None


In [10]:
from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from langgraph.graph import StateGraph, END
from langgraph.graph.message import add_messages

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)

# State definition for LangGraph
from typing import TypedDict, List

class RefinementState(TypedDict):
    source_text: str
    direct_translation: str
    target_language: str
    locale: str
    refined_output: str
    terminology_calls: List[dict]

In [11]:
def refinement_node(state: RefinementState):
    source = state["source_text"]
    translated = state["direct_translation"]
    target_lang = state["target_language"]
    locale = state["locale"]

    # Step 1: Ask LLM what terms might need checking
    analysis_prompt = (
        "You are a financial language expert.\n"
        "Identify any financial terms or tickers in the text that may need "
        "official terminology lookup.\n"
        "Return them as a comma-separated list only."
    )
    analysis_response = llm.invoke([
        SystemMessage(content=analysis_prompt),
        HumanMessage(content=translated)
    ])
    terms_to_check = [t.strip() for t in analysis_response.content.split(",") if t.strip()]

    terminology_calls = []
    replacements = {}

    # Step 2: Lookup each term
    for term in terms_to_check:
        translation = terminology_lookup(term)
        terminology_calls.append({"term": term, "translation": translation})
        if translation:
            replacements[term] = translation

    # Step 3: Ask LLM to refine the text with these corrections
    refinement_prompt = (
        f"You are a localization expert. Rewrite the translation for {target_lang} ({locale}) readers.\n"
        "Improve flow and readability, but preserve original meaning.\n"
        "Incorporate the following official term translations:\n"
        f"{replacements}\n"
        "Output only the refined translation."
    )

    refinement_response = llm.invoke([
        SystemMessage(content=refinement_prompt),
        HumanMessage(content=translated)
    ])

    state["refined_output"] = refinement_response.content.strip()
    state["terminology_calls"] = terminology_calls
    return state


In [12]:
# build graph
graph = StateGraph(RefinementState)
graph.add_node("refine", refinement_node)
graph.set_entry_point("refine")
graph.set_finish_point("refine")

app = graph.compile()


In [None]:
init_state = {
    "source_text": "The fund invests primarily in Emerging Markets like Malaysia and Indonesia.",
    "direct_translation": "PBT利润率在第二季度相比于第一季度增加了15%.",
    "target_language": "Mandarin",
    "locale": "MY",
}

final_state = app.invoke(init_state)
print(final_state["refined_output"])
print(final_state["terminology_calls"])


Dana ini melabur terutamanya di Pasaran Baru Muncul seperti Malaysia dan Indonesia.
[{'term': 'Pasaran Baru Muncul', 'translation': None}]
