In [1]:
import pandas as pd
import requests
from tqdm import tqdm

# ========== 配置 ==========
API_KEY = "" #这里是key位置
API_URL = "" #这里是URL位置

INPUT_FILE  = "合理用药数据集v4.1-测试用.xlsx"
OUTPUT_FILE = "GPT模型结果_new.xlsx"

PROMPTS = {
    "单选": "以下是关于中药处方审核的单选题，请根据规则选择正确的选项。仅输出选项即可。",
    "多选": "以下是关于中药处方审核的多选题，请根据规则选择所有正确的选项。仅输出选项即可。"
}

# ========== 通用调用函数 ==========
def call_gpt(prompt: str, question: str, candidate_answers: str) -> str:
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}"
    }
    payload = {
        "model": "gpt-4o", 
        "messages": [
            {"role": "system", "content": prompt},
            {"role": "user",   "content": f"题目：{question}\n选项：{candidate_answers}"}
        ],
        "max_tokens": 8,
        "temperature": 0
    }
    try:
        resp = requests.post(API_URL, headers=headers, json=payload, timeout=30)
        resp.raise_for_status()
        return resp.json()["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print(f"API Error: {e}")
        return "Error"

# ========== 主流程 ==========
xls = pd.ExcelFile(INPUT_FILE)

with pd.ExcelWriter(OUTPUT_FILE) as writer:
    for sheet in ["单选", "多选"]:
        if sheet not in xls.sheet_names:
            print(f"⚠️ 找不到工作表：{sheet}，跳过")
            continue

        df = pd.read_excel(xls, sheet_name=sheet)
        prompt = PROMPTS[sheet]

        model_answers = []
        for _, row in tqdm(df.iterrows(), desc=f"{sheet} 处理中"):
            question = row["Question"]
            candidate_answers = row["Candidate answers"]
            ans = call_gpt(prompt, question, candidate_answers)
            # 简单清洗：去掉解释，只保留选项
            ans = "".join(c for c in ans if c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ、，, ")
            ans = ans.replace("，", "、").replace(",", "、").strip()
            model_answers.append(ans)

        df["模型答案"] = model_answers
        df.to_excel(writer, sheet_name=sheet, index=False)

print("✅ 处理完成，结果已保存到：", OUTPUT_FILE)

单选 处理中: 297it [09:24,  1.90s/it]
多选 处理中: 31it [02:40,  5.17s/it]

✅ 处理完成，结果已保存到： GPT模型结果_new.xlsx





In [2]:
import pandas as pd
import re

INPUT_FILE = "GPT模型结果_new.xlsx"
OUTPUT_FILE = "GPT模型结果_new_清洗后.xlsx"

def clean_answer(ans):
    """清洗模型答案"""
    if pd.isna(ans):
        return ""
    # 提取所有大写字母
    letters = re.findall(r"[A-Z]", str(ans).upper())
    if len(letters) == 1:
        return letters[0]         
    return " ".join(letters)       

# 读取
xls = pd.ExcelFile(INPUT_FILE)

with pd.ExcelWriter(OUTPUT_FILE) as writer:
    for sheet in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet)
        if "模型答案" in df.columns:
            df["模型答案"] = df["模型答案"].apply(clean_answer)
        df.to_excel(writer, sheet_name=sheet, index=False)

print("清洗完成 →", OUTPUT_FILE)

清洗完成 → GPT模型结果_new_清洗后.xlsx
