# 运行LBFE，得到论文特征集,特征集保存在目录output-path下，文件名为‘运行时间+feature.csv’

In [None]:
#coding:utf-8
import preprocess
import chat_with_LLM as cwl
import logger_config
import sys
from tqdm import tqdm
import CLM
import numpy as np
import copy
import pandas as pd
import re
import time
import LBFE
logger = logger_config.myLogger().get_logger()

In [None]:
def LBFE_process():
    logger.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    logger.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    # 数据准备
    logger.info("数据准备...")
    print("数据准备...")
    try:
        papers_path=preprocess.get_config('PATH','papers-path')
        output_path=preprocess.get_config('PATH','output-path')
        papers_file_path_list=preprocess.get_file_paths(papers_path)
        api_key=preprocess.get_config('API KEY','kimiapi-key')
    except Exception as e:
        logger.error("数据准备失败，请检查配置文件！")
        print("数据准备失败，请检查配置文件！")
        logger.error(e)
        # raise e
        # 终止程序
        sys.exit()
    logger.info("数据准备完成！")
    print("数据准备完成！")
    
    # 创建质量特征对象
    fconstructor=LBFE.featrue_constructor(api_key=api_key,logger=logger)
    # 检查账户余额
    balance1=fconstructor.check_balance()
    if balance1<0:
        logger.error("账户余额不足，请充值！")
        # 终止程序
        sys.exit()
    logger.info("账户余额为：{}".format(balance1))

    # # 获取papers的文件名列表
    # paper_name_list=preprocess.get_file_names(papers_path)
    # 获取赛题主要内容
    logger.info("初始化prompt...")
    initial_prompt=fconstructor.prompt_init()
    logger.info("初始化prompt完成！")

    # 定义五项完整性指标：包括三项内容完整性指标、一项结构完整性指标、一项禁止事项指标。初始化为零，下同。
    # completeness_feature={"群众留言分类任务":'',"热点问题挖掘任务":'','答复意见评价任务':'',"结构完整性":'',"禁止事项":''}
    completeness_feature={"群众留言分类任务":'',"热点问题挖掘任务":'','答复意见评价任务':''}
    # 定义九项实质性指标：每个任务各三项
    substantiality_feature={"文本处理和特征提取":'',"群众留言分类":'',"多方法比较":'',
                                "热点度量方法":'',"提取热点问题":'',"度量方法完备性":'',
                                "答复意见评价":'',"合理论据支持":'',"问题分类评价":''}
    # 定义六项一致性指标：
    # consistency_feature={"主题一致":'',"方法一致":'','结论一致':'',"内容覆盖度":'',"摘要简洁性":'',"关键词一致":''}
    consistency_feature={"主题一致":'',"方法一致":'','结论一致':'',"内容覆盖度":'',"摘要简洁性":''}
    # 定义五项写作水平指标：
    writing_feature={"文字流畅性":'',"写作规范性":'',"论文逻辑性":'',"篇章结构合理性":'',"论点论据一致性":''}
    feature_list=["论文名"]+[key for key, value in completeness_feature.items()]+[key for key, value in substantiality_feature.items()]+[key for key, value in consistency_feature.items()]+[key for key, value in writing_feature.items()]
    # 创建dataframe
    df = pd.DataFrame(columns=feature_list)
    total_tokens=0
    name=time.strftime("%Y-%m-%d-%H-%M", time.localtime())+"-feature.csv"
    feature_path=output_path+name
    logger.info("开始评价竞赛论文...")
    print("开始评价竞赛论文...")

    for i in tqdm(range(len(papers_file_path_list))):
        try:
            paper_path=papers_file_path_list[i]
            paper_name=paper_path.split("\\")[-1]
            logger.info("{}:开始评价{}...".format(i+1,paper_name))
            paper_content=preprocess.read_file(paper_path)
            paper_content=preprocess.data_clean(paper_content)
            feature_completeness,feature_substantiality,feature_consistency,feature_writing,cost_tokens=fconstructor.four_judgement_process(paper_name,initial_prompt,paper_content)

            # 将特征写入dataframe
            df.at[i, "论文名"]=paper_name
            if feature_completeness is not None:
                df.at[i, "群众留言分类任务"]=feature_completeness[0]
                df.at[i, "热点问题挖掘任务"]=feature_completeness[1]
                df.at[i, "答复意见评价任务"]=feature_completeness[2]
            if feature_substantiality is not None:
                df.at[i, "文本处理和特征提取"]=feature_substantiality[0]
                df.at[i, "群众留言分类"]=feature_substantiality[1]
                df.at[i, "多方法比较"]=feature_substantiality[2]
                df.at[i, "热点度量方法"]=feature_substantiality[3]
                df.at[i, "提取热点问题"]=feature_substantiality[4]
                df.at[i, "度量方法完备性"]=feature_substantiality[5]
                df.at[i, "答复意见评价"]=feature_substantiality[6]
                df.at[i, "合理论据支持"]=feature_substantiality[7]
                df.at[i, "问题分类评价"]=feature_substantiality[8]
            if feature_consistency is not None:
                df.at[i, "主题一致"]=feature_consistency[0]
                df.at[i, "方法一致"]=feature_consistency[1]
                df.at[i, "结论一致"]=feature_consistency[2]
                df.at[i, "内容覆盖度"]=feature_consistency[3]
                df.at[i, "摘要简洁性"]=feature_consistency[4]
            if feature_writing is not None:
                df.at[i, "文字流畅性"]=feature_writing[0]
                df.at[i, "写作规范性"]=feature_writing[1]
                df.at[i, "论文逻辑性"]=feature_writing[2]
                df.at[i, "篇章结构合理性"]=feature_writing[3]
                df.at[i, "论点论据一致性"]=feature_writing[4]
            output_path=preprocess.get_config('PATH','output-path')
            # print(df)
            df.to_csv(feature_path,index=False,encoding='utf-8')
            logger.info("{}:{}消耗的token数为:{}！".format(i+1,paper_name,cost_tokens))
            total_tokens+=cost_tokens
            logger.info("{}:{}评价完成！".format(i+1,paper_name))
        except Exception as e:
            logger.error("{}:{}评价失败！".format(i+1,paper_name))
            logger.error(e)
            # raise e
            # 终止程序
            # sys.exit()

    logger.info("所有竞赛论文评价完成！")
    print("所有竞赛论文评价完成！")

    # 计算消耗的token数
    logger.info("总共消耗的token数为：{}。花费费用为：{}元。".format(total_tokens,total_tokens*0.000022))
    # 检查余额
    balance2=fconstructor.check_balance()
    if balance2<0:
        logger.error("账户余额不足，请充值！")
    else:
        logger.info("账户余额为：{}".format(balance2))
    
    
LBFE_process()

# 将上文提取得到的特征集作为输入，将特征值由文字档次转换为数值指标,得到"时间-level2number.csv"特征集

In [None]:
api_key=preprocess.get_config('API KEY','kimiapi-key')
fconstructor=LBFE.featrue_constructor(api_key=api_key,logger=logger)
feature_path='上一步得到的Feature文件路径'
df_feature=pd.read_csv(feature_path)
fconstructor.level2number(df_feature)

# 运行CLM，得到综合评价指数，输出文件保存在output-path下，文件名为“Comprehensive_Mark.csv”


In [None]:
def CLM_process():
    # 根据模型评价结果，生成综合评价
    logger.info("开始综合评价...")
    print("开始综合评价...")
    # a=outputprocess.four_judgement_process(all_judgements)
    # b=outputprocess.comprehensive_score(all_judgements)
    feature_path_number='上一步得到的运行时间+comprehensive-index.csv文件路径'
    df_feature_number=pd.read_csv(feature_path_number)
    comprehensive_judge=CLM.comprehensive_judger()
    Attribute_Weight=comprehensive_judge.entropyWeight(df_feature_number)
    Result_All,Z,weight=comprehensive_judge.topsis(df_feature_number,Attribute_Weight)
    # return Result_All,Z,weight
    Result_All.to_csv(r"./output/Comprehensive_Mark.csv")
    logger.info("综合评价完成！")
    print("综合评价完成！")
    logger.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    logger.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
CLM_process()

# 最后根据综合评价指数，设计合理的综合评分方法，即可得到最终结果