In [10]:
import os
import requests
import json
import pandas as pd
import random
import time
import re
import sqlite3
import datetime
import nltk
from nltk.translate.meteor_score import meteor_score
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from sentence_transformers import SentenceTransformer, util

from dotenv import load_dotenv, find_dotenv
import warnings
 
# 忽略所有的 Warning 警告
warnings.filterwarnings('ignore')
 
# 读取本地/项目的环境变量。

# find_dotenv() 寻找并定位 .env 文件的路径
# load_dotenv() 读取该 .env 文件，并将其中的环境变量加载到当前的运行环境中  
# 如果你设置的是全局的环境变量，这行代码则没有任何作用。
_ = load_dotenv(find_dotenv())
coze_api_key=os.environ["COZE_API_KEY"]
COZE_BOT_ID = os.environ["COZE_BOT_ID"]
COZE_BOT_ID_ROLE = os.environ["COZE_BOT_ID_RULE"]
COZE_BOT_WORD = os.environ["COZE_BOT_WORD"]

# 加载预训练的 Sentence-BERT 模型
sbert_model = SentenceTransformer("/root/translation/model")

No sentence-transformers model found with name /root/translation/model. Creating a new one with mean pooling.


In [11]:
def send_request_word(personal_access_token, con_id, bot_id, user_id, query, custom_variables):
    # 填充调用Coze API的具体代码，获得coze的回复，返回为json格式
    url = 'https://api.coze.cn/open_api/v2/chat'
    
    headers = {
        'Authorization': f'Bearer {personal_access_token}',
        'Content-Type': 'application/json',
        'Accept': '*/*',
        'Host': 'api.coze.cn',
        'Connection': 'keep-alive'
    }
    
    data = {
        'conversation_id': con_id,
        'bot_id': bot_id,
        'user': user_id,
        'query': query,
        'stream': False,
        'custom_variables':{
            'suggestions': custom_variables
        }
    }
    
    response = requests.post(url, headers=headers, json=data)
    
    return response.json()

def get_word_suggestions(input_text, list_of_table):
    # Convert the list of lists into a pandas DataFrame
    df = pd.DataFrame(list_of_table[1:], columns=list_of_table[0])
    # 将df第一列中的所有字符串都转换成小写
    df.iloc[:, 0] = df.iloc[:, 0].str.lower()
    # 将df中所有元素进行字符串清洗，去除多余空格，换行符，制表符等
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    # 将input_text中的所有单词都转换成小写
    input_text = input_text.lower()
    # 使用nltk.word_tokenize将input_text中的单词进行分词
    token_text = nltk.word_tokenize(input_text)
    # 使用nltk.pos_tag将input_text中的单词进行词性标注
    token_text = nltk.pos_tag(token_text,tagset='universal')
    check_list = []
    # 将token_text中的每一个元素在df的Non-STE（对应元素中的第一个值）和N-POS（对应元素中的第二个值）中匹配查找，如果找到了，则将df对应行添加到check_list中
    for word in token_text:
        # 查找df中是否有与word[0]相同的值，并返回行号
        row_index = df[df['Non-STE'] == word[0]].index
        # 如果找到了，则将df对应行添加到check_list中
        if not row_index.empty:
            # 检查df中对应行的N-POS是否为空，或者是否与word[1]相同
            #if df.loc[row_index[0]][1] == "" or df.loc[row_index[0]][1] == word[1]:
            #    check_list.append(df.loc[row_index[0]].tolist())
            check_list.append(df.loc[row_index[0]].tolist())
    # 将check_list中元素去重
    check_list = list(set(tuple(row) for row in check_list))
    # 将check_list中每个元素生成修改建议字符串，返回list
    word_suggestions = []
    for i in range(len(check_list)):
        suggest = "replace word \'" + check_list[i][0] + "\' with \'" + check_list[i][2] + "\'"
        suggest = suggest.lower()
        word_suggestions.append(suggest)
    return word_suggestions

In [12]:
def call_coze_api(query,bot_id,con_id='123',user_id='zmx'):
    # 填充调用Coze API获得回复的代码，返回为字典
    response_data = send_request(coze_api_key, con_id, bot_id, user_id, query)
    response = parse_message_object(response_data)
    if response["plugin"] != "":
        ref_info = plugin_text_process(response["plugin"])
        response.update(ref_info)
    return response

def send_request(personal_access_token, con_id, bot_id, user_id, query):
    # 填充调用Coze API的具体代码，获得coze的回复，返回为json格式
    url = 'https://api.coze.cn/open_api/v2/chat'
    
    headers = {
        'Authorization': f'Bearer {personal_access_token}',
        'Content-Type': 'application/json',
        'Accept': '*/*',
        'Host': 'api.coze.cn',
        'Connection': 'keep-alive'
    }
    
    data = {
        'conversation_id': con_id,
        'bot_id': bot_id,
        'user': user_id,
        'query': query,
        'stream': False
    }
    
    response = requests.post(url, headers=headers, json=data)
    
    return response.json()

def parse_message_object(message_dict):
    # 解析coze API返回的结果，以字典的形式返回
    # 初始化变量以存储结果
    plugin_data = ""
    last_answer_content = ""

    # 获取messages列表
    messages = message_dict.get('messages', [])

    # 遍历messages列表
    for message in messages:
        # 提取最后一个answer的content值
        if message.get('type') == 'answer':
            last_answer_content = message.get('content', "")

        # 提取plugin数据
        if message.get('type') == 'verbose':
            content_str = message.get('content', "")
            try:
                # 尝试将字符串解析为JSON对象
                content_data = json.loads(content_str)
                # 检查msg_type是否为stream_plugin_finish
                if content_data.get('msg_type') == 'stream_plugin_finish':
                    plugin_data = json.dumps(content_data.get('data'), ensure_ascii=False)
            except (json.JSONDecodeError, TypeError):
                # 如果解析失败或data不是字典，我们忽略这个条目
                continue

    # 构建结果字典
    result = {
        'plugin': plugin_data.strip(),
        'answer': last_answer_content.strip()
    }
    
    return result

def extract_bracket_content(text):
    # 正则表达式模式，匹配{}及其中的内容，但不包括{}
    pattern = r'({.*?})'
    # 使用findall方法找到所有匹配项
    matches = re.findall(pattern, text)
    return matches

def plugin_text_process(ref_info):
    # 将Plugin输出转化为字典
    # key = ref_web_url，ref_web_name，ref_std，think_process，result
    text = ref_info.replace('\\n', '')
    text = text.replace('\\\\\\\\\\\\\\', '\\')
    text = text.replace('\\\\\\', '')
    text = text[2:-2]
    text = extract_bracket_content(text)[0]
    text=eval(text)
    return text

def get_bot_direct_reply(api_response):
    bot_answer = api_response.get('answer', "")
    bot_answer = json.loads(bot_answer)
    initial = bot_answer.get('initial', "")
    reflection = bot_answer.get('reflection', "")
    intermediate = bot_answer.get('intermediate', "")
    classificationId = bot_answer.get('classificationId', "")
    answer = bot_answer.get('answer', "")
    return answer, reflection, intermediate, classificationId, initial

In [13]:
# 计算METEOR分数和BLEU分数
def calculate_metrics(reference, candidate):
    # 预先分词
    reference_tokens = nltk.word_tokenize(reference)
    candidate_tokens = nltk.word_tokenize(candidate)
    # 计算METEOR分数
    score_m = meteor_score([reference_tokens], candidate_tokens)
    # 使用 SmoothingFunction 来避免得分为 0 的情况
    smooth = SmoothingFunction().method1
    # 计算句子级别的BLEU分数
    sentence_bleu_score = sentence_bleu(reference, candidate_tokens, smoothing_function=smooth)
    return score_m, sentence_bleu_score

# Sentence-BERT模型的指标计算
def calculate_metrics_sbert(reference, candidate, sbert_model):
    # 生成嵌入向量
    reference_embedding = sbert_model.encode(reference, convert_to_tensor=True)
    candidate_embedding = sbert_model.encode(candidate, convert_to_tensor=True)
    # 计算余弦相似度
    cosine_similarity = util.pytorch_cos_sim(reference_embedding, candidate_embedding)
    return cosine_similarity.item()

# 输出相似度得分

In [15]:
query = "对左机翼外襟翼1#支臂整流罩区域内部做一般目视检"
expert = "Do a general visual inspection (internal) of the left outboard flap No.1 strut fairing."
# 计算query单词数
query_word_count = len(expert.split())
api_response = call_coze_api(query,COZE_BOT_ID)
answer, reflection, intermediate, classificationId, initial = get_bot_direct_reply(api_response)
list_of_table=[["Non-STE","N-POS","STE","POS"],["acceptable ","","PERMITTED ",""],["alternate ","","ALTERNATIVE ",""],["any ","","None",""],["avoid ","VERB","PREVENT ","VERB"],["both ","","THE TWO ","NOUN"],["check ","VERB","CHECK ","NOUN"],["cover ","VERB","COVER ","NOUN"],["damage ","VERB","DAMAGE ","NOUN"],["ensure ","VERB","MAKE SURE ","VERB"],["fit ","VERB","INSTALL ","VERB"],["follow ","VERB","OBEY ","VERB"],["further ","","MORE ",""],["further ","","MORE ",""],["have to ","VERB","MUST ","VERB"],["insert ","VERB","PUT ","VERB"],["main ","","PRIMARY ",""],["may ","","CAN ","VERB"],["need ","VERB","NECESSARY ",""],["now ","","AT THIS TIME",""],["over ","PRT","ABOVE,ON,ALONG ","PRT"],["perform ","VERB","DO ","VERB"],["press ","VERB","PUSH ","VERB"],["reach ","VERB","GET ","VERB"],["repeat ","VERB","DO,AGAIN",""],["required ","VERB","NECESSARY ",""],["rotate ","VERB","TURN ","VERB"],["secure ","VERB","ATTACH ","VERB"],["shall ","","MUST ","VERB"],["should ","","MUST ","VERB"],["since ","","BECAUSE ",""],["test ","VERB","TEST ","NOUN"],["therefore ","","THUS,AS A RESULT",""]]
custom_variables=get_word_suggestions(answer,list_of_table)
print(f'问题：{query}')
print(f'字数：{query_word_count}')
# 输出
print(f'写作类别：{classificationId}')
print(f'初翻：{initial}')
print(f'中间结果：{intermediate}')
print(f'二次修改：{answer}')
print(f'规则修改意见：{reflection}')
if custom_variables != []:
    result = send_request_word(coze_api_key,'123', COZE_BOT_WORD, 'zmx', answer, str(custom_variables))
    result = parse_message_object(result)
    result = result.get('answer', "")
    print(f'字词修改意见：{custom_variables}')
else:
    result = answer 
print(f'最终结果：{result}')
# 计算METEOR分数和BLEU分数
#s1_m_1, s1_b_1 = calculate_metrics(expert, answer)
#s1_m_2, s1_b_2 = calculate_metrics(expert, intermediate)
#print(f'S1-B-1: {s1_b_1}')
#print(f'S1-M-1: {s1_m_1}')
#print(f'S1-B-2: {s1_b_2}')
#print(f'S1-M-2: {s1_m_2}')
#s1_s_1 = calculate_metrics_sbert(expert, answer, sbert_model)
#s1_s_2 = calculate_metrics_sbert(expert, intermediate, sbert_model)
#print(f'S1-S-1: {s1_s_1}')
#print(f'S1-S-2: {s1_s_2}')

问题：对左机翼外襟翼1#支臂整流罩区域内部做一般目视检
字数：14
写作类别：0
初翻：Perform a general visual inspection inside the area of the left wing's outboard flap 1# support fairing.
中间结果：Inspect the area inside the left wing's outboard flap 1# support fairing visually.
二次修改：Perform a visual inspection inside the area of the left wing's outboard flap 1# support fairing.
规则修改意见：["Rule 3.6: Change 'Perform a general visual inspection inside the area of the left wing's outboard flap 1# support fairing.' to 'Inspect the area inside the left wing's outboard flap 1# support fairing visually.'", "Rule 5.3: Change 'Perform a general visual inspection' to 'Inspect visually'.", "Rule 2.3: Add 'the' before 'area inside the left wing's outboard flap 1# support fairing visually'."]
字词修改意见：["replace word 'perform' with 'do'"]
最终结果：Do a visual inspection inside the area of the left wing's outboard flap 1# support fairing.


In [7]:
# 使用pandas读取CSV文件
df = pd.read_csv('./tests/trans_test_1.csv', encoding='utf-8')
query_list = df['query'].tolist()
for i in range(75,len(query_list)):
#for i in range(len(query_list)):
    query = query_list[i]
    api_response = call_coze_api(query,COZE_BOT_ID)
    answer, reflection, intermediate, classificationId, initial = get_bot_direct_reply(api_response)
    if reflection != []:
        # 使用列表推导式将列表中的每个元素转换为字符串，并用换行符连接
        reflection = '<br>'.join(str(item) for item in reflection)
        reflection = "规则识别：<br>"+reflection
    else:
        reflection = ""
    list_of_table=[["Non-STE","N-POS","STE","POS"],["acceptable ","","PERMITTED ",""],["alternate ","","ALTERNATIVE ",""],["any ","","None",""],["avoid ","VERB","PREVENT ","VERB"],["both ","","THE TWO ","NOUN"],["check ","VERB","CHECK ","NOUN"],["cover ","VERB","COVER ","NOUN"],["damage ","VERB","DAMAGE ","NOUN"],["ensure ","VERB","MAKE SURE ","VERB"],["fit ","VERB","INSTALL ","VERB"],["follow ","VERB","OBEY ","VERB"],["further ","","MORE ",""],["further ","","MORE ",""],["have to ","VERB","MUST ","VERB"],["insert ","VERB","PUT ","VERB"],["main ","","PRIMARY ",""],["may ","","CAN ","VERB"],["need ","VERB","NECESSARY ",""],["now ","","AT THIS TIME",""],["over ","PRT","ABOVE,ON,ALONG ","PRT"],["perform ","VERB","DO ","VERB"],["press ","VERB","PUSH ","VERB"],["reach ","VERB","GET ","VERB"],["repeat ","VERB","DO,AGAIN",""],["required ","VERB","NECESSARY ",""],["rotate ","VERB","TURN ","VERB"],["secure ","VERB","ATTACH ","VERB"],["shall ","","MUST ","VERB"],["should ","","MUST ","VERB"],["since ","","BECAUSE ",""],["test ","VERB","TEST ","NOUN"],["therefore ","","THUS,AS A RESULT",""]]
    custom_variables=get_word_suggestions(answer,list_of_table)
    if custom_variables != []:
        result = send_request_word(coze_api_key,'123', COZE_BOT_WORD, 'zmx', answer, str(custom_variables))
        result = parse_message_object(result)
        result = result.get('answer', "")
        print(f'字词修改意见：{custom_variables}')
    else:
        result = answer 
    print(f'问题：{query}')
    # 输出answer内容
    print(f'最终结果：{result}')
    # 输出进度
    print(f'进度：{i+1}/{len(query_list)}')
    # 结果内容写入到df中
    df.loc[i, 'answer'] = answer
    df.loc[i, 'reflection'] = reflection
    df.loc[i, 'intermediate'] = intermediate
    df.loc[i, 'classificationId'] = classificationId
    df.loc[i, 'initial'] = initial
    df.loc[i, 'result'] = result
    df.loc[i, 'custom_variables'] = custom_variables
    # 计算METEOR分数和BLEU分数
    s1_m_1, s1_b_1 = calculate_metrics(df['expert'][i], result)
    s1_m_2, s1_b_2 = calculate_metrics(df['expert'][i], intermediate)
    df.loc[i, 'S1-B-1'] = s1_b_1
    df.loc[i, 'S1-B-2'] = s1_b_2
    df.loc[i, 'S1-M-1'] = s1_m_1
    df.loc[i, 'S1-M-2'] = s1_m_2
    # 计算Sbert分数
    s1_s_1 = calculate_metrics_sbert(df['expert'][i], result, sbert_model)
    df.loc[i, 'S1-S-1'] = s1_s_1
    if str(df['intermediate'][i]) != 'nan':
        s1_s_2 = calculate_metrics_sbert(df['expert'][i], intermediate, sbert_model)
        df.loc[i, 'S1-S-2'] = s1_s_2

# df结果另存为文件
df.to_csv('./tests/result_trans_S1.csv', index=False, encoding='utf-8', mode='w')

问题：(2) 拆卸螺钉[2]、垫片[3]，并移除空勤氧气瓶安装托架[1]。
结果：Remove screw [2] and washer [3]. Take off the aircrew oxygen cylinder mounting bracket [1].
进度：76/129
问题：(1) 将空勤氧气瓶安装托架[1]移至安装位置，按定力25 lbf.in ～ 35 lbf.in ( 2.8 N.m ～ 4 N.m )，拧紧螺钉[2]、垫片[3]。
结果：Mount the aerial oxygen cylinder bracket [1] to the installation position. Tighten the screw [2] and washer [3] with a torque of 25 lbf.in to 35 lbf.in (2.8 N.m to 4 N.m).
进度：77/129
问题：(b) 将温度压力传感器[1]安装在带法兰盘的三通[2]的接头上；
结果：Mount the temperature and pressure sensor [1] onto the tee fitting [2] with its flange.
进度：78/129
问题：(b) 拆卸螺钉[4]、弹簧垫片[5]、钢垫片[6]、铜垫片[7]，断开低压开关[1]上的搭接线[8]；
结果：Remove screw [4], spring washer [5], steel washer [6], copper washer [7], and disconnect the jumper [8] from the low-pressure switch [1].
进度：79/129
问题：(d) 安装螺钉[4]、弹簧垫片[5]、钢垫片[6]、铜垫片[7]，将搭接线[8]连接到低压开关[1]上；
结果：Install screw [4], spring washer [5], steel washer [6], copper washer [7], and connect the jump wire [8] to the low-pressure switch [1].
进度：80/129
问题：(4) 确认固定碗形件[8]与氧气舱[11]的螺钉[2]、垫