In [None]:
import os
import time
import random
import pandas as pd
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv(""),
    base_url="",
)

MODEL_NAME = ""

SYSTEM_PROMPT = """
"""

def build_messages(project_id, content):
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"项目编号：{project_id}\n文本描述：{content.strip()}"}
    ]

def call_llm(project_id, content, max_retries=3):
    messages = build_messages(project_id, content)
    for attempt in range(max_retries):
        try:
            completion = client.chat.completions.create(
                model=MODEL_NAME,
                messages=messages,
            )
            return completion.choices[0].message.content.strip(), completion.usage
        except Exception as e:
            print(f"⚠️ API调用失败（第{attempt+1}次）：{e}")
            time.sleep(min(2 ** attempt, 10) * random.uniform(0.8, 1.2))
    return None, None

def parse_response(text):
    keys = ["项目编号", "视觉关键词", "听觉关键词", "嗅觉关键词", "味觉关键词", "触觉关键词"]
    result = {k: "" for k in keys}
    for line in text.splitlines():
        if "：" in line:
            k, v = line.split("：", 1)
            k, v = k.strip(), v.strip()
            if k in result:
                result[k] = v
    return result

def process_txt_to_excel(txt_path, input_root, output_root):
    filename = os.path.splitext(os.path.basename(txt_path))[0]
    rel_path = os.path.relpath(txt_path, input_root)
    rel_dir = os.path.dirname(rel_path)
    project_id = os.path.basename(rel_dir)

    out_dir = os.path.join(output_root, rel_dir)
    os.makedirs(out_dir, exist_ok=True)
    excel_path = os.path.join(out_dir, f"{filename}.xlsx")

    with open(txt_path, "r", encoding="utf-8") as f:
        content = f.read().strip()

    llm_output, usage = call_llm(project_id, content)
    if not llm_output:
        print(f"❌ 处理失败: {txt_path}")
        return None

    parsed = parse_response(llm_output)
    parsed["文本描述"] = content  

    column_order = ["项目编号", "文本描述", "视觉关键词", "听觉关键词", "嗅觉关键词", "味觉关键词", "触觉关键词"]
    df = pd.DataFrame([parsed], columns=column_order)
    df.to_excel(excel_path, index=False)

    print(f"✅ 输出完成: {excel_path}")


    prompt_tokens = usage.prompt_tokens if usage else 0
    completion_tokens = usage.completion_tokens if usage else 0
    cost_input = prompt_tokens / 1000 * 0.0008
    cost_output = completion_tokens / 1000 * 0.002
    cost_total = cost_input + cost_output

    print(f"  本次文件Token统计：输入 {prompt_tokens}，输出 {completion_tokens}")
    print(f"  本次文件花费：输入 {cost_input:.6f} 元 + 输出 {cost_output:.6f} 元 = {cost_total:.6f} 元\n")

    return usage

def process_all_txts(input_folder, output_folder):
    txt_files = [os.path.join(root, f)
                 for root, _, files in os.walk(input_folder)
                 for f in files if f.lower().endswith(".txt")]
    print(f"共发现 {len(txt_files)} 个txt文件待处理")

    total_prompt_tokens = 0
    total_completion_tokens = 0

    for idx, txt_path in enumerate(txt_files, 1):
        print(f"[{idx}/{len(txt_files)}] 正在处理：{txt_path}")
        usage = process_txt_to_excel(txt_path, input_folder, output_folder)
        if usage:
            total_prompt_tokens += usage.prompt_tokens if hasattr(usage, "prompt_tokens") else 0
            total_completion_tokens += usage.completion_tokens if hasattr(usage, "completion_tokens") else 0

    print("\n=== 统计结果 ===")
    print(f"总输入Token数: {total_prompt_tokens}，约{total_prompt_tokens/1000:.3f}千Token")
    print(f"总输出Token数: {total_completion_tokens}，约{total_completion_tokens/1000:.3f}千Token")

    cost_input = total_prompt_tokens / 1000 * 0.0008
    cost_output = total_completion_tokens / 1000 * 0.002
    cost_total = cost_input + cost_output

    print(f"总计费用：输入 {cost_input:.6f} 元 + 输出 {cost_output:.6f} 元 = {cost_total:.6f} 元")

# 主入口
if __name__ == "__main__":
    input_folder = r""    
    output_folder = r""       
    process_all_txts(input_folder, output_folder)
