In [None]:
# 升級 pip
!pip install -U pip

# 安裝套件
!pip install -U pandas lxml tqdm transformers huggingface_hub tokenizers sentencepiece safetensors odfpy

In [None]:
import os
import re
import gc
import sys
import glob
import html
import logging
import warnings
from typing import Optional, List, Tuple, Dict, Any

import pandas as pd
from lxml import etree
from tqdm import tqdm

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# ================== 避免輸出一堆訊息 ==================
os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
os.environ.setdefault("PYTHONWARNINGS", "ignore")
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")

warnings.filterwarnings("ignore")
for name in ["transformers", "huggingface_hub", "urllib3", "accelerate", "bitsandbytes", "torch"]:
    logging.getLogger(name).setLevel(logging.ERROR)
logging.getLogger().setLevel(logging.ERROR)
try:
    from transformers.utils import logging as hf_logging
    hf_logging.set_verbosity_error()
except Exception:
    pass
try:
    from huggingface_hub.utils import logging as hub_logging
    hub_logging.set_verbosity(hub_logging.ERROR)
except Exception:
    pass
try:
    from tokenizers import logging as tk_logging
    tk_logging.set_verbosity_error()
except Exception:
    pass

# ================== 全域設定 ==================
os.environ["TRANSFORMERS_NO_TF"] = "1"
os.environ["BITSANDBYTES_NOWELCOME"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:128")

# ---- 參數設定 ----
NEWTOK_RATIO     = float(os.getenv("NEWTOK_RATIO", "1.4"))

DEBUG_SHOW_CHAT  = os.getenv("DEBUG_SHOW_CHAT", "0").strip().lower() in ("1","true","yes","y","on")
DEBUG_PROMPTS    = os.getenv("DEBUG_PROMPTS", "1").strip().lower() in ("1","true","yes","y","on")
PRINT_RAW_ZH     = os.getenv("PRINT_RAW_ZH", "0").strip().lower() in ("1","true","yes","y","on")
PROMPT_MAX_CHARS = int(os.getenv("PROMPT_MAX_CHARS", "0"))  # 0=不截斷

for k in ("HF_TOKEN", "HUGGINGFACEHUB_API_TOKEN", "HUGGING_FACE_HUB_TOKEN"):
    if API_KEY:
        os.environ[k] = API_KEY

# ================== 小工具 ==================
def dprint(msg: str = ""):
    if DEBUG_PROMPTS:
        print(msg, file=sys.stderr, flush=True)

def _truncate(s: str) -> str:
    if PROMPT_MAX_CHARS and len(s) > PROMPT_MAX_CHARS:
        return s[:PROMPT_MAX_CHARS] + "\n...[truncated]..."
    return s

def to_one_line(s: str) -> str:
    return (s or "").replace("\r", "").replace("\n", "\\n").replace("\t", "\\t")

try:
    from transformers import BitsAndBytesConfig
    _HAS_BNB = True
except Exception:
    _HAS_BNB = False

def _pick_dtype() -> torch.dtype:
    if not torch.cuda.is_available():
        return torch.float32
    cap_major = torch.cuda.get_device_capability(0)[0]
    return torch.bfloat16 if cap_major >= 8 else torch.float16

# ================== LLM 載入 ==================
def load_llm(model_name: str, fallback_name: str):
    def _load(name: str, quant: Optional[str]):
        hf_tok = os.environ.get("HF_TOKEN", None)
        try:
            tok = AutoTokenizer.from_pretrained(name, use_fast=True, token=hf_tok)
        except TypeError:
            tok = AutoTokenizer.from_pretrained(name, use_fast=True, use_auth_token=hf_tok)
        tok.padding_side = "left"
        if tok.pad_token_id is None:
            tok.pad_token_id = tok.eos_token_id

        qcfg = None
        if _HAS_BNB and torch.cuda.is_available() and quant:
            if quant == "4bit":
                qcfg = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_compute_dtype=_pick_dtype(),
                    bnb_4bit_quant_type="nf4",
                    bnb_4bit_use_double_quant=True,
                )
            elif quant == "8bit":
                qcfg = BitsAndBytesConfig(load_in_8bit=True)

        try:
            mdl = AutoModelForCausalLM.from_pretrained(
                name,
                torch_dtype=_pick_dtype(),
                device_map=("auto" if torch.cuda.is_available() else None),
                low_cpu_mem_usage=True,
                quantization_config=qcfg,
                attn_implementation="sdpa",
                token=hf_tok,
                trust_remote_code=True
            )
        except TypeError:
            mdl = AutoModelForCausalLM.from_pretrained(
                name,
                torch_dtype=_pick_dtype(),
                device_map=("auto" if torch.cuda.is_available() else None),
                low_cpu_mem_usage=True,
                quantization_config=qcfg,
                attn_implementation="sdpa",
                use_auth_token=hf_tok,
                trust_remote_code=True
            )
        mdl.eval()
        if getattr(mdl.generation_config, "pad_token_id", None) is None:
            mdl.generation_config.pad_token_id = tok.pad_token_id
        return tok, mdl

    last_err = None
    for name, quant in [
        (model_name, "4bit"), (model_name, "8bit"), (model_name, None),
        (fallback_name, "4bit"), (fallback_name, "8bit"), (fallback_name, None)
    ]:
        try:
            tok, mdl = _load(name, quant)
            return tok, mdl, name
        except Exception as e:
            last_err = e
            continue
    raise RuntimeError(f"載入模型失敗：{last_err}")

tokenizer, model, _ = load_llm(MODEL, FALLBACK_MODEL)

print("CUDA available:", torch.cuda.is_available())
print("GPU count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("Current GPU:", torch.cuda.get_device_name(0))
print("Model repo:", getattr(model.config, "_name_or_path", None))
try:
    print("Model dtype:", getattr(next(iter(model.parameters())), "dtype", "n/a"))
except StopIteration:
    print("Model dtype: n/a")
print("Device map keys (sample):", list(getattr(model, "hf_device_map", {"(no map)": "..."}))[:8])

# ================== 英文檢測 & HTML 過濾 ==================
_EN2 = re.compile(r"[A-Za-z]")
def english_letter_count(s: Optional[str]) -> int:
    if not s: return 0
    return len(_EN2.findall(s))

def clean_visible_text(s: str) -> str:
    if s is None: return ""
    s = html.unescape(s)
    s = re.sub(r"<[^>]+>", "", s)
    return s.strip()

# ================== 詞庫（ODS） ==================
def load_lookup_from_ods(folder: str = "data") -> pd.DataFrame:
    paths = sorted(glob.glob(os.path.join(folder, "*.ods")))
    if not paths:
        print(f"[提示] 詞庫資料夾 {folder} 內找不到 .ods 檔案，將不提供 glossary。", file=sys.stderr)
        return pd.DataFrame({"en": [], "zh": []})
    rows = []
    for p in paths:
        try:
            df = pd.read_excel(p, engine="odf")
            if "英文名稱" in df.columns and "中文名稱" in df.columns:
                sub = df[["英文名稱", "中文名稱"]].copy()
                sub.columns = ["en", "zh"]
                rows.append(sub)
            else:
                print(f"[略過] {p} 缺少『英文名稱/中文名稱』欄位", file=sys.stderr)
        except Exception as e:
            print(f"[警告] 無法讀取 {p}: {e}", file=sys.stderr)
    if not rows:
        print("[提示] 未取得任何有效的詞庫資料。", file=sys.stderr)
        return pd.DataFrame({"en": [], "zh": []})
    out = pd.concat(rows, ignore_index=True)
    out = out.dropna(subset=["en", "zh"]).drop_duplicates().reset_index(drop=True)
    return out

# ================== LCS / Glossary ==================
def soft_norm(s: str) -> str:
    return re.sub(r'[\s/_\-.:]+', ' ', s.lower()).strip()

_GLOSSARY_FILTER_PAT = re.compile(
    r'(</?[A-Za-z][^>]*>|&lt;/?[A-Za-z][^&]*?&gt;|%L\d+|%\d+|%n|\{\d+\}|&(?:[A-Za-z]+|#\d+|#x[0-9A-Fa-f]+);)',
    flags=re.IGNORECASE
)
def _clean_for_glossary(text: str) -> str:
    return _GLOSSARY_FILTER_PAT.sub(' ', text)

class LCSMatcher:
    _TOKEN_RE = re.compile(r"[A-Za-z0-9]+(?:[\/_\.:\-][A-Za-z0-9]+)*")
    def __init__(self, lookup_df: pd.DataFrame):
        self.lookup = lookup_df.copy()
        if not self.lookup.empty:
            self.lookup["en_soft"] = self.lookup["en"].apply(soft_norm)
            self.soft_index: Dict[str, Tuple[str, str]] = {}
            for _, row in self.lookup.iterrows():
                key = row["en_soft"]
                if key not in self.soft_index:
                    self.soft_index[key] = (row["en"], row["zh"])
            self.max_soft_len = max((len(x.split()) for x in self.lookup["en_soft"]), default=1)
        else:
            self.soft_index = {}
            self.max_soft_len = 1

    def build_glossary_sentence_first(self, text: str, *, limit: int = 8) -> Dict[str, str]:
        if not self.soft_index: return {}
        text_clean = _clean_for_glossary(text)
        toks = self._TOKEN_RE.findall(text_clean.lower())
        glossary: Dict[str, str] = {}
        for w in range(min(len(toks), self.max_soft_len), 0, -1):
            if len(glossary) >= limit: break
            for i in range(0, len(toks) - w + 1):
                key = soft_norm(" ".join(toks[i:i+w]))
                if key in self.soft_index and self.soft_index[key][0] not in glossary:
                    en, zh = self.soft_index[key]
                    if re.search(r"[；;、/]|(?:^|[^一-龥])或([^一-龥]|$)", str(zh)):
                        continue
                    glossary[en] = zh
                    if len(glossary) >= limit: break
        return glossary

# ================== Prompt ==================
SYS_PROMPT = (
    "You are translating software UI strings into zh-Hant (Taiwan).\n"
    "Translate ONLY the human-readable English text. Do NOT change, translate, add, "
    "remove, re-order, or add spaces around ANY of the following:\n"
    "• HTML/XML tags and their attributes (e.g., <b>, </p>, <a href=\"...\">\n"
    "• HTML/XML entities: &NAME; (e.g., &amp;, &lt;, &gt;, &nbsp;), numeric (&#123;), hex (&#x1A;)\n"
    "• Qt/printf-style placeholders & format specifiers: %n, %1, %2, %L1, {0}, {1}, {2}, "
    "%#, %s, %d, %f, %.2f, %.*f, %%\n"
    "Never invent or output artificial placeholders like @@0@@, @@1@@, or empty braces {}.\n"
    "Keep ALL of those tokens EXACTLY as in SOURCE (same spelling, case, order, and spacing).\n"
    "If a string mixes text and tags/placeholders, translate only the plain English words and "
    "leave tags/placeholders untouched and in the same positions.\n"
    "Do NOT echo the source or the hints and do NOT add explanations.\n"
    "Reply in this exact format:\n"
    "<zh>{translation}</zh>\n"
    "If the source is empty, reply <zh></zh>.\n"
)

STRICT_SYS_PROMPT = SYS_PROMPT + (
    "\nIMPORTANT:\n"
    "- The output MUST contain Chinese characters (CJK). If you leave any English letters "
    "outside the allowed tokens/tags/placeholders, that is a failure.\n"
    "- For very short UI labels, ALWAYS translate (e.g., Error→錯誤, Request→請求, Response→回應, Tools→工具).\n"
)

def apply_chat_template_strict(tokenizer, user_prompt: str) -> str:
    try:
        return tokenizer.apply_chat_template(
            [{"role": "system", "content": STRICT_SYS_PROMPT},
             {"role": "user",   "content": user_prompt}],
            tokenize=False,
            add_generation_prompt=True,
        )
    except Exception:
        return "SYSTEM:\n" + STRICT_SYS_PROMPT.strip() + "\n\nUSER:\n" + user_prompt + "\n\nASSISTANT:"

def build_user_prompt(source_text: str, hints: Dict[str, str]) -> str:
    lines = []
    lines.append("SOURCE:")
    lines.append(source_text if source_text is not None else "")
    lines.append("")
    lines.append("HINTS (do not copy, just reference):")
    if hints:
        for en, zh in list(hints.items()):
            lines.append(f"- {en} -> {zh}")
    else:
        lines.append("(none)")
    lines.append("")
    lines.append("Reply ONLY with one line: <zh>{translation}</zh>")
    return "\n".join(lines)

def apply_chat_template(tokenizer, user_prompt: str) -> str:
    try:
        return tokenizer.apply_chat_template(
            [{"role": "system", "content": SYS_PROMPT},
             {"role": "user",   "content": user_prompt}],
            tokenize=False,
            add_generation_prompt=True,
        )
    except Exception:
        return "SYSTEM:\n" + SYS_PROMPT.strip() + "\n\nUSER:\n" + user_prompt + "\n\nASSISTANT:"

# ================== 後處理 ==================
_ZH_TAG_RE = re.compile(r"<zh>(.*?)</zh>", flags=re.S)
META_LINE_PAT = re.compile(r"^(?:- |\u2022)?\s*(?:原文|譯文|翻譯|Translation|Original)\s*[:：]", flags=re.IGNORECASE)

def drop_meta_lines(s: str) -> str:
    kept = []
    for ln in s.splitlines():
        if META_LINE_PAT.search(ln): continue
        kept.append(ln)
    return "\n".join(kept).strip()

def strip_label_prefixes(src: str, out_s: str) -> str:
    if english_letter_count(src) >= 2 or src.strip().startswith("<") or src.strip().startswith("&lt;"):
        out_s = re.sub(r"^(?:原文|譯文|翻譯|Translation|Original|請求|回應|說明)\s*[:：]\s*", "", out_s)
    return out_s

_ENC_TAG_RE       = re.compile(r"&lt;/?[A-Za-z][^&]*?&gt;")
_TAG_RE           = re.compile(r"</?[^>]+?>")
_ENTITY_RE2       = re.compile(r"&(?:[A-Za-z]+|#[0-9]+|#x[0-9A-Fa-f]+);")
_PLACEHOLDER_RE_1 = re.compile(r"%(?:L\d+|\d+|n)")
_PLACEHOLDER_RE_2 = re.compile(r"\{\d+\}")
_GLOB_RE          = re.compile(r"(?<!\w)(?:\*|\?)[A-Za-z0-9._-]+")

def strip_added_spans_not_in_src(src: str, out_s: str) -> str:
    allowed = set()
    for pat in (_ENC_TAG_RE, _TAG_RE, _ENTITY_RE2, _PLACEHOLDER_RE_1, _PLACEHOLDER_RE_2, _GLOB_RE):
        for m in pat.finditer(src):
            allowed.add(m.group(0))
    def _filter(pat: re.Pattern, text: str) -> str:
        return pat.sub(lambda m: m.group(0) if m.group(0) in allowed else "", text)
    for pat in (_ENC_TAG_RE, _TAG_RE, _ENTITY_RE2, _PLACEHOLDER_RE_1, _PLACEHOLDER_RE_2, _GLOB_RE):
        out_s = _filter(pat, out_s)
    return out_s

def trim_quotes(s: str) -> str:
    return re.sub(r'^(?:[“”"「」])+|(?:[“”"「」])+$', "", s).strip()

def enforce_zh_only_preserve_tags(text: str) -> str:
    if not text:
        return text
    keep_spans = {}
    def _ins(span: str) -> str:
        key = f"@@K{len(keep_spans)}@@"
        keep_spans[key] = span
        return key
    tmp = text
    for pat in (_ENC_TAG_RE, _TAG_RE, _ENTITY_RE2, _PLACEHOLDER_RE_1, _PLACEHOLDER_RE_2):
        tmp = pat.sub(lambda m: _ins(m.group(0)), tmp)
    tmp = re.sub(r"[A-Za-z]+", "", tmp)
    tmp = re.sub(r"[^\s\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF\u3000-\u303F\uFF00-\uFFEF\u2000-\u206F0-9\uFF10-\uFF19\u0021-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]", "", tmp)
    tmp = re.sub(r"\s+", " ", tmp).strip()
    for k, v in keep_spans.items():
        tmp = tmp.replace(k, v)
    return tmp

# === Token 修復 ===
LOCKED_RE_STR = r"(?:&lt;/?[A-Za-z][^&]*?&gt;|</?[^>]+?>|&(?:[A-Za-z]+|#[0-9]+|#x[0-9A-Fa-f]+);|%(?:L\d+|\d+|n)|\{\d+\}|(?<!\w)(?:\*|\?)[A-Za-z0-9._-]+)"
LOCKED_RE = re.compile(LOCKED_RE_STR)
ATAT_TOKEN_RE = re.compile(r"@@\d+@@")

def _collect_locked_tokens(src: str) -> list[str]:
    return [m.group(0) for m in LOCKED_RE.finditer(src)]

def _prefix_locked_concat(src: str) -> str:
    out, pos = [], 0
    while True:
        m = LOCKED_RE.match(src, pos)
        if not m: break
        out.append(m.group(0))
        pos = m.end()
    return "".join(out)

def _suffix_locked_concat(src: str) -> str:
    out = []
    pos = len(src)
    # 尋找連續尾段 token
    while True:
        last = None
        for mm in LOCKED_RE.finditer(src):
            if mm.end() == pos:
                last = mm
        if not last: break
        out.append(last.group(0))
        pos = last.start()
        if pos == 0 or not LOCKED_RE.match(src, pos - 1):
            break
    out.reverse()
    return "".join(out)

def repair_tokens_from_source(src: str, text: str) -> str:
    if not text:
        return text
    tokens = _collect_locked_tokens(src)
    idx = 0
    def _repl(_m):
        nonlocal idx
        t = tokens[idx] if idx < len(tokens) else ""
        idx += 1
        return t
    text = ATAT_TOKEN_RE.sub(_repl, text)
    text = strip_added_spans_not_in_src(src, text)
    pref = _prefix_locked_concat(src)
    suf  = _suffix_locked_concat(src)
    if pref and not text.startswith(pref):
        text = pref + text
    if suf and not text.endswith(suf):
        text = text + suf
    if idx < len(tokens):
        text += "".join(tokens[idx:])
    return text

# === 若變成空或無中文 → 回退原文 ===
_CJK_RE = re.compile(r"[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF]")
def _looks_nonsense(src: str, s: str) -> bool:
    v = clean_visible_text(s)
    if v == "{}":
        return True
    if re.fullmatch(r"[\s\{\}\[\]\(\)《》〈〉「」『』、，。．…—\-–~!@#$%^&*_|\\:;\"'`,.*?/]*", v or ""):
        return True
    if re.fullmatch(r"\{\d*\}", v or "") and (v not in src):
        return True
    if not _CJK_RE.search(s or ""):
        return True
    return False

def extract_zh_or_fallback(src: str, model_out: str) -> str:
    if not model_out:
        return src
    m = _ZH_TAG_RE.search(model_out)
    s = (m.group(1) if m else model_out).strip()
    s = drop_meta_lines(s)
    s = trim_quotes(s)
    s = strip_label_prefixes(src, s)
    s = strip_added_spans_not_in_src(src, s)
    s = enforce_zh_only_preserve_tags(s)
    s = ATAT_TOKEN_RE.sub("", s)
    if "{}" in s and "{}" not in src:
        s = s.replace("{}", "")
    s = repair_tokens_from_source(src, s)
    if _looks_nonsense(src, s):
        s = src
    return s

# ================== 後備字典（在重試仍失敗時使用） ==================
UI_FALLBACK_DICT = {
    "Error": "錯誤",
    "Request": "請求",
    "Response": "回應",
    "API Request / Response": "API 請求 / 回應",
    "Tools": "工具",
    "Open Model": "開啟模型",
    "Input parameters": "輸入參數",
    "Outputs": "輸出",
    "There is no active layer.": "沒有活動圖層。",
    "Help author: {0}": "說明作者：{0}",
    "Algorithm author: {0}": "演算法作者：{0}",
    "Algorithm version: {0}": "演算法版本：{0}",
    "Could not prepare selected algorithm.": "無法準備所選的演算法。",
    "Error adding processed features back into the layer.": "將處理後的要素加回圖屠時發生錯誤。",
    "Generating prepared API file (please wait)…": "正在產生預先準備的 API 檔案（請稍候）…",
    "Add Model to Toolbox…": "新增模型至工具箱…",
    "Add Script to Toolbox…": "新增腳本至工具箱…",
    "Add script(s)": "新增腳本",
    "Processing models (*.model3 *.MODEL3)": "處理模型 (*.model3 *.MODEL3)",
    "Processing scripts (*.py *.PY)": "處理腳本 (*.py *.PY)",
    "The selected file does not contain a valid model": "選取的檔案不包含有效的模型",
    "Model with the same name already exists": "已有相同名稱的模型",
    "There is already a model file with the same name. Overwrite?": "已有同名的模型檔案。是否覆寫？",
    "Rasterize mesh dataset": "網格資料集光柵化",
}

def dict_fallback_translate(src: str) -> Optional[str]:
    key = re.sub(r"\s+", " ", (src or "").strip())
    if key in UI_FALLBACK_DICT:
        return repair_tokens_from_source(src, UI_FALLBACK_DICT[key])
    return None

# ================== 翻譯（batch ） ==================
BAD_WORDS = ["assistant","user","system","以下","翻譯","說明","根據","提示","僅供參考"]
def build_bad_words_ids(tokenizer) -> List[List[int]]:
    bad_ids: List[List[int]] = []
    for w in BAD_WORDS:
        ids = tokenizer.encode(w, add_special_tokens=False)
        if ids: bad_ids.append(ids)
    return bad_ids

@torch.inference_mode()
def generate_texts(tokenizer, model, prompts: List[str]) -> List[str]:
    tokenizer.padding_side = "left"
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id
    bad_words_ids = build_bad_words_ids(tokenizer)

    def _run(ps: List[str], max_new_tokens_cap: int):
        # 以單一 MAX_TOKENS 負責「輸入截斷上限」
        max_ctx = getattr(tokenizer, "model_max_length", MAX_TOKENS)
        enc = tokenizer(
            ps,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=min(MAX_TOKENS, max_ctx if max_ctx and max_ctx < 10**7 else MAX_TOKENS),
        )
        if torch.cuda.is_available():
            enc = {k: v.cuda() for k, v in enc.items()}
        input_ids = enc["input_ids"]
        nonpad_lens = (input_ids != tokenizer.pad_token_id).sum(dim=1).tolist()
        wanted_per_row = [max(MIN_TOKENS, int(l * NEWTOK_RATIO)) for l in nonpad_lens]
        # 以單一 MAX_TOKENS 作為「生成最大上限」
        batch_max_new = min(max(wanted_per_row), max_new_tokens_cap)

        gen = model.generate(
            **enc,
            max_new_tokens=batch_max_new,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
            no_repeat_ngram_size=6,
            bad_words_ids=bad_words_ids if bad_words_ids else None,
            use_cache=True,
            return_dict_in_generate=True,
            output_scores=False,
        )
        seq = gen.sequences
        outs = []
        for i in range(seq.size(0)):
            in_len = int(nonpad_lens[i])
            new_tokens = seq[i, in_len:]
            s = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
            outs.append(s)
        return outs

    for cap in (MAX_TOKENS, max(64, MAX_TOKENS // 2), 32):
        try:
            return _run(prompts, cap)
        except torch.cuda.OutOfMemoryError:
            torch.cuda.empty_cache(); gc.collect()

    results = []
    for pr in prompts:
        ok = False
        for cap in (max(64, MAX_TOKENS // 2), 32):
            try:
                results.extend(_run([pr], cap))
                ok = True
                break
            except torch.cuda.OutOfMemoryError:
                torch.cuda.empty_cache(); gc.collect()
        if not ok:
            results.append("")
    return results

# ================== .ts 解析 ==================
def clean_visible_text_for_ts(raw: str) -> str:
    if raw is None: return ""
    s = html.unescape(raw)
    s = re.sub(r"<[^>]+>", "", s)
    s = re.sub(r"&lt;[^&]+?&gt;", "", s)
    return s.strip()

def collect_ts_targets_use_source(root: etree._Element) -> Tuple[list, list, list]:
    targets, sources, kinds = [], [], []

    # numerus="yes"：寫回 <numerusform>，來源用 <source>
    for msg in root.xpath("//message[@numerus='yes']"):
        src_text = (msg.findtext("./source") or "").strip()
        trans = msg.find("./translation")
        if trans is None:
            trans = etree.SubElement(msg, "translation")
        nfs = trans.findall("./numerusform")
        if not nfs:
            nf = etree.SubElement(trans, "numerusform")
            nfs = [nf]
        for nf in nfs:
            targets.append(nf)
            sources.append(src_text)
            kinds.append("numerusform")

    # 一般狀況：寫回 <translation>，來源用 <source>
    for msg in root.xpath("//message[not(@numerus='yes')]"):
        src = msg.find("./source")
        if src is None:
            continue
        source_text = (src.text or "").strip()
        if not source_text:
            continue
        trans = msg.find("./translation")
        if trans is None:
            trans = etree.SubElement(msg, "translation")
        targets.append(trans)
        sources.append(source_text)
        kinds.append("translation")

    return targets, sources, kinds

# ================== 批次翻譯 + 寫回 ==================
def translate_ts_and_write(
    ts_in: str,
    ts_out: str,
    ods_folder: str = "data",
    *,
    hint_terms_cap: int = 8,
    batch_size: Optional[int] = None,
):
    tree = etree.parse(ts_in)
    root = tree.getroot()

    lookup_df = load_lookup_from_ods(ods_folder)
    matcher = LCSMatcher(lookup_df) if not lookup_df.empty else None

    targets, sources, kinds = collect_ts_targets_use_source(root)
    total = len(targets)
    if total == 0:
        print("沒有需要翻譯的項目。")
        return

    print(f"[TS] 待翻譯節點：{total}（translation 與 numerusform 皆以 <source> 為翻譯來源）")

    bs = max(1, batch_size or BATCH)

    for i in tqdm(range(0, total, bs), desc="Batch translating"):
        seg_targets = targets[i:i + bs]
        seg_sources = sources[i:i + bs]
        seg_kinds   = kinds[i:i + bs]

        prompts: List[str] = []
        hints_list: List[Dict[str, str]] = []

        for src_text in seg_sources:
            vis = clean_visible_text(src_text)
            hints = matcher.build_glossary_sentence_first(vis, limit=hint_terms_cap) if matcher is not None else {}
            user_prompt = build_user_prompt(src_text, hints)
            chat_prompt = apply_chat_template(tokenizer, user_prompt)
            prompts.append(chat_prompt)
            hints_list.append(hints)

        # 除錯輸出
        user_prompts_debug = [build_user_prompt(s, h) for s, h in zip(seg_sources, hints_list)]
        for j, (src, up, cp, hints, kind) in enumerate(zip(seg_sources, user_prompts_debug, prompts, hints_list, seg_kinds)):
            idx = i + j + 1
            dprint(f"\n=== [PROMPT {idx}/{total}] ({kind}) ===")
            dprint(f"SOURCE: {to_one_line(src)}")
            dprint("HINTS: " + (" | ".join([f"{en} -> {zh}" for en, zh in hints.items()]) if hints else ""))
            if DEBUG_SHOW_CHAT:
                dprint("RENDERED_CHAT_PROMPT:")
                dprint(_truncate(to_one_line(cp)))

        # 推理
        outs = generate_texts(tokenizer, model, prompts)

        # 後處理 + 寫回
        for node, src_text, out_text, kind, hints in zip(seg_targets, seg_sources, outs, seg_kinds, hints_list):
            zh = extract_zh_or_fallback(src_text, out_text)

            # 若仍是英文/原文，啟動重試
            need_retry = (zh == src_text) or (not _CJK_RE.search(zh or ""))
            if need_retry:
                strict_user_prompt = build_user_prompt(src_text, hints) + (
                    "\n\nExamples:\n"
                    "- Error -> 錯誤\n- Request -> 請求\n- Response -> 回應\n- Tools -> 工具\n"
                )
                strict_chat_prompt = apply_chat_template_strict(tokenizer, strict_user_prompt)
                out2 = generate_texts(tokenizer, model, [strict_chat_prompt])[0]
                zh2 = extract_zh_or_fallback(src_text, out2)
                if (zh2 != src_text) and _CJK_RE.search(zh2 or ""):
                    zh = zh2
                else:
                    # 嚴格重試仍失敗 → 試小字典保底
                    zh3 = dict_fallback_translate(src_text)
                    if zh3:
                        zh = zh3

            node.text = zh
            if node.tag == "translation":
                node.attrib.pop("type", None)
            elif node.tag == "numerusform":
                parent = node.getparent()
                if parent is not None and parent.tag == "translation":
                    parent.attrib.pop("type", None)

            if not PRINT_RAW_ZH:
                en_print = clean_visible_text_for_ts(src_text)
                zh_print = clean_visible_text_for_ts(zh)
            else:
                en_print, zh_print = src_text, zh
            dprint(f"{to_one_line(en_print)} -> {to_one_line(zh_print)}")

            # 可選：提醒如果還遺失 token
            miss_src_tokens = [t for t in _collect_locked_tokens(src_text) if t not in zh]
            if miss_src_tokens:
                dprint(f"[WARN] missing tokens -> {' | '.join(miss_src_tokens)}")

        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    tree.write(ts_out, encoding="utf-8", xml_declaration=True, pretty_print=True)
    print(f"✅ 已輸出翻譯檔案：{ts_out}")

# ================== 執行 ==================
if __name__ == "__main__":
    translate_ts_and_write(
        ts_in=INPUT_FILENAME,
        ts_out=OUTPUT_FILENAME,
        ods_folder=ODS_DIR,
        hint_terms_cap=8,
        batch_size=BATCH,
    )


The repository THUDM/glm-4-9b-chat contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/THUDM/glm-4-9b-chat .
 You can inspect the repository content at https://hf.co/THUDM/glm-4-9b-chat.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y
The repository THUDM/glm-4-9b-chat contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/THUDM/glm-4-9b-chat .
 You can inspect the repository content at https://hf.co/THUDM/glm-4-9b-chat.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y


2025-08-26 07:55:30.685481: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756194930.707929     234 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756194930.715253     234 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1756194930.733861     234 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1756194930.733885     234 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1756194930.733887     234 computation_placer.cc:177] computation placer alr

The repository THUDM/glm-4-9b-chat contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/THUDM/glm-4-9b-chat .
 You can inspect the repository content at https://hf.co/THUDM/glm-4-9b-chat.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y


Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]

CUDA available: True
GPU count: 1
Current GPU: NVIDIA GeForce RTX 3090
Model repo: THUDM/glm-4-9b-chat
Model dtype: torch.bfloat16
Device map keys (sample): ['transformer']
[TS] 待翻譯節點：2711（translation 與 numerusform 皆以 <source> 為翻譯來源）


Batch translating:   0%|          | 0/170 [00:00<?, ?it/s]
=== [PROMPT 1/2711] (numerusform) ===
SOURCE: Showing {0} - {1} of %n result(s)
HINTS: 

=== [PROMPT 2/2711] (numerusform) ===
SOURCE: Showing {0} - {1} of %n result(s)
HINTS: 

=== [PROMPT 3/2711] (translation) ===
SOURCE: <html><body><h2>Algorithm description</h2>
HINTS: algorithm -> 演算法

=== [PROMPT 4/2711] (translation) ===
SOURCE: Active layer is not a vector layer.
HINTS: vector -> 向量

=== [PROMPT 5/2711] (translation) ===
SOURCE: Active layer is not editable (and editing could not be turned on).
HINTS: 

=== [PROMPT 6/2711] (translation) ===
SOURCE: Selected algorithm and parameter configuration are not compatible with in-place modifications.
HINTS: algorithm -> 演算法 | parameter -> 參數

=== [PROMPT 7/2711] (translation) ===
SOURCE: Could not prepare selected algorithm.
HINTS: algorithm -> 演算法

=== [PROMPT 8/2711] (translation) ===
SOURCE: Error adding processed features back into the layer.
HINTS: 

=== [PROMPT 9/2711] (tr