In [None]:
import pandas as pd
import os

# 简化版本 - 直接生成目标格式
folder_path = "../CMMLU/data/test"
all_data = []

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        results = []
        for _, row in df.iterrows():
            # 构建问题部分
            question = str(row['Question']) if 'Question' in row else ''
            
            # 构建选项部分
            options = []
            for opt in ['A', 'B', 'C', 'D']:
                if opt in row and pd.notna(row[opt]) and str(row[opt]).strip() != '':
                    options.append(f"{opt}. {row[opt]}")
            
            # 合并问题和选项
            question_with_options = question + "\n" + "\n".join(options) if options else question
            
            # 获取答案
            answer = str(row['Answer']) if 'Answer' in row else ''
            
            # 添加到结果列表
            results.append({
                'question_with_options': question_with_options,
                'answer': answer,
                'source_file': filename
            })
        
        all_data.extend(results)
        print(f"处理完成: {filename}")

# 创建最终DataFrame
final_df = pd.DataFrame(all_data)

# 保存为cmmlu_concat.csv
final_df.to_csv('cmmlu_concat.csv', index=False, encoding='utf-8-sig')

print(f"\n处理完成！共{len(final_df)}道题目")
print("最终格式: 问题(含选项) | 答案 | 来源文件")
print("\n样例数据:")
print("=" * 60)
for i, row in final_df.head(2).iterrows():
    print(f"问题:\n{row['question_with_options']}")
    print(f"答案: {row['answer']}")
    print(f"来源: {row['source_file']}")
    print("-" * 60)

处理完成: agronomy.csv
处理完成: anatomy.csv
处理完成: ancient_chinese.csv
处理完成: arts.csv
处理完成: astronomy.csv
处理完成: business_ethics.csv
处理完成: chinese_civil_service_exam.csv
处理完成: chinese_driving_rule.csv
处理完成: chinese_food_culture.csv
处理完成: chinese_foreign_policy.csv
处理完成: chinese_history.csv
处理完成: chinese_literature.csv
处理完成: chinese_teacher_qualification.csv
处理完成: clinical_knowledge.csv
处理完成: college_actuarial_science.csv
处理完成: college_education.csv
处理完成: college_engineering_hydrology.csv
处理完成: college_law.csv
处理完成: college_mathematics.csv
处理完成: college_medical_statistics.csv
处理完成: college_medicine.csv
处理完成: computer_science.csv
处理完成: computer_security.csv
处理完成: conceptual_physics.csv
处理完成: construction_project_management.csv
处理完成: economics.csv
处理完成: education.csv
处理完成: electrical_engineering.csv
处理完成: elementary_chinese.csv
处理完成: elementary_commonsense.csv
处理完成: elementary_information_and_technology.csv
处理完成: elementary_mathematics.csv
处理完成: ethnology.csv
处理完成: food_science.csv
处理完成: genetics.