In [1]:
# -*- coding: utf-8 -*-
import argparse
import copy
import json
import os
import pickle
import random
import re
import sys
from collections import defaultdict, deque
from typing import List
import re
from typing import Any, List
import ipydagred3
import networkx as nx
import numpy as np
import openai
from tqdm import tqdm
from csqa_utils import *

#os.environ["http_proxy"] = "http://localhost:7890"
#os.environ["https_proxy"] = "http://localhost:7890"
setOpenAi(keyid = 4)

#GPT_MODEL = "gpt-4-turbo"
#GPT_MODEL = "gpt-4o-mini"
GPT_MODEL = "gpt-3.5-turbo"

# 读取问题数据文件
dataset = []
with open('train_rand_split.jsonl', 'r') as f:
    for line in f:
        data = json.loads(line)
        dataset.append(data)

In [6]:
model = "gpt-4-turbo-preview"
messages = [{
    'role': 'user',
    'content':'What is the capital of France?'
}]

In [9]:
response = openai.ChatCompletion.create(
            model = model,
            messages = messages,
            temperature = 1
        )

In [10]:
response

<OpenAIObject chat.completion id=chatcmpl-A0xmPvxE9EuxhXEOy7HdPIN14iJ0l at 0x117879130> JSON: {
  "id": "chatcmpl-A0xmPvxE9EuxhXEOy7HdPIN14iJ0l",
  "object": "chat.completion",
  "created": 1724793157,
  "model": "gpt-4-0125-preview",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "The capital of France is Paris.",
        "refusal": null
      },
      "logprobs": null,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 14,
    "completion_tokens": 7,
    "total_tokens": 21
  },
  "system_fingerprint": null
}

In [10]:
dataset[1444]

{'answerKey': 'B',
 'id': '9b1b25ed5e3f1186dd019e17169a15eb',
 'question': {'question_concept': 'dogs',
  'choices': [{'label': 'A', 'text': 'four legs'},
   {'label': 'B', 'text': 'males'},
   {'label': 'C', 'text': 'electrical circuit'},
   {'label': 'D', 'text': 'pet'},
   {'label': 'E', 'text': 'sniff'}],
  'stem': "When they say dogs are a man's best friend, they aren't talking about just whats?"}}

In [6]:
problem_id = 88

question = dataset[problem_id]["question"]["stem"]
choices = dataset[problem_id]["question"]["choices"]
gold_answer = dataset[problem_id]["answerKey"]

In [8]:
print(question)
print(choices)

Where can meat last a long time?
[{'label': 'A', 'text': 'backery'}, {'label': 'B', 'text': 'ham sandwich'}, {'label': 'C', 'text': 'fridge'}, {'label': 'D', 'text': 'butcher shop'}, {'label': 'E', 'text': 'freezer'}]


任务分解 

In [11]:
def decompose_sql(question, choices):   

    prompt_for_decompose = """

You will be given a common sense question, and a set of choices of answers. 
Please help me decompose this problem into a series of step-by-step sub-problems.

1 examples are as follows:
Question: When they say dogs are a man's best friend, they aren't talking about just whats?
Choices: 
[{{'label': 'A', 'text': 'four legs'}},
   {{'label': 'B', 'text': 'males'}},
   {{'label': 'C', 'text': 'electrical circuit'}},
   {{'label': 'D', 'text': 'pet'}},
   {{'label': 'E', 'text': 'sniff'}}]
   
Steps:
1. Understand the Meaning of the Question
2. Identify the Options, review each of the given options
3. Analyze and evaluate each option based on the question
4. Select the Best Option

Now the question is {},
the choices are: {},

please decompose it into a series of easy-to-solve steps like the examples.
Answer Format: (Please write each broken-down question step on a separate line, starting with a number.)
To solve the question "xxx", we need to solve the following problems step by step:
1. sub-question 1
2. sub-question 2
3. sub-question 3
...
""".format(question, choices)

    Q = {
        "role": "user",
        "content": prompt_for_decompose
    }
    # Query = Example+[Q]
    Query = [Q]
    result = askChatGPT(Query, model=GPT_MODEL, temperature=1)
    return result


def convert_steps_to_format(decom_commands):
    # 截取“we need to know:”后的内容
    start_index = decom_commands.find("we need to solve the following problems step by step:") + len("we need to solve the following problems step by step:")
    subtasks_text = decom_commands[start_index:].strip()
    # 将每个子任务单独列出
    subtasks = subtasks_text.split('\n')
    subtasks = [task.strip().split('. ', 1)[-1] for task in subtasks]
    steps_dict = {index: value for index, value in enumerate(subtasks)}
    return subtasks, steps_dict

decompose_steps = decompose_sql(question, choices)
steps, steps_dict = convert_steps_to_format(decompose_steps)
num_steps = len(steps)
print(steps)
print(steps_dict)  # 只是加了一个问题的编号而已.

['Understand the Meaning of the Question', 'Identify the Options, review each of the given options', 'Analyze and evaluate each option based on the question', 'Select the Best Option']
{0: 'Understand the Meaning of the Question', 1: 'Identify the Options, review each of the given options', 2: 'Analyze and evaluate each option based on the question', 3: 'Select the Best Option'}


In [12]:
print(len(steps))

4


In [13]:
# TOT 求解
N = 3  # 每个子问题进行N次proposal
M = 2  # 通过评估选出M个最好的proposal

solution = []

for i in range(num_steps):
    subtask = steps[i]
    sys_q = f"""
    There is a common sense question. I need you to solve it and give an answer.
    
    Here is the problem:\n{question}
    
    The choices are: {choices}

I have broken this  problem down into a series of smaller problems. I will assign you sub-problems one by one, and provide the results of the previous sub-problems as a reference for your reasoning.
Please solve the problem and respond according to logic.
"""  

    subask = f"""\nThe sub-problem to solve now is xxx: {subtask}
Based on the information above, please provide a concise and clear answer"""
    
    if len(solution)==0:
        # 第一个子问题
        query = subask
        Q = [{'role':'system', 'content':sys_q},
            {'role':'user', 'content':query},]
        for n in range(N):  # 一个子问题提问N次,获取N个解
            result = askChatGPT(Q, model=GPT_MODEL, temperature=1)
            eval_Q = Q + [{'role':'assistant', 'content':result}]
            eval_Q = eval_Q + [{'role':'user', 'content':"Please provide a confidence rating for the accuracy of this solution, on a scale from 1 to 5. Only output the number."}]
            score = askChatGPT(eval_Q, model=GPT_MODEL, temperature=1)
            score = int(score)
            
            solution.append((score, [result]))  # 维护一整条推理路径
            
        solution = sorted(solution, key=lambda x: x[0])
        solution = solution[:M]  # 剪枝
    else:
        temp_solution = []
        for m in range(M):  # 因为剪枝动态维护M个推理路径
            answersSoFar = f"""\nSo far, the answers to the preceding sub-problems are as follows: The format is Sub-problem-Id: xxx; Sub-problem: xxx; Answer: xxx."""
            for index, value in enumerate(solution[m][1]):
                try:
                    answersSoFar += f"""\nSub-problem-Id: {index}; Sub-problem: {steps[index]}; Answer: {value}."""
                except:
                    print('warning')
                    print(index)
                    print(len(solution[m][1]))
                    print(len(steps))
                    sys.exit(0)
            query = answersSoFar+subask
            Q = [{'role':'system', 'content':sys_q},
                 {'role':'user', 'content':query},]
            for n in range(N):  # 一个子问题提问N次,获取N个解
                result = askChatGPT(Q, model=GPT_MODEL, temperature=1)
                eval_Q = Q + [{'role':'assistant', 'content':result}]
                eval_Q = eval_Q + [{'role':'user', 'content':"Please provide a confidence rating for the accuracy of this solution, on a scale from 1 to 5. Only output the number."}]
                score = askChatGPT(eval_Q, model=GPT_MODEL, temperature=1)
                score = int(score)
                
                temp_solution.append((solution[m][0]+score, solution[m][1]+[result]))  # 路径score累加
        
        # print(len(temp_solution))  # 此时temp_solution中应该有M*N种推理路径
        solution = sorted(temp_solution, key=lambda x: x[0])
        solution = solution[:M]  # 剪枝 M*N->M

print(len(solution))
printSeq(solution)
# 用额外的一次query再问一下最终的答案


2
(20, ['The question is asking about where meat can last a long time.', 'Sub-problem: Identify the Options, review each of the given options\n\n- Option A: bakery\n- Option B: ham sandwich\n- Option C: fridge\n- Option D: butcher shop\n- Option E: freezer\n\nBased on the information provided in the question, the best option for where meat can last a long time would be E: freezer.', 'Based on the question provided, where meat can last a long time is in the freezer (Option E). The low temperatures in the freezer help to preserve the meat and extend its shelf life compared to other options such as the fridge, bakery, ham sandwich, or butcher shop.', 'Sub-problem: Select the Best Option\nAnswer: The best option for where meat can last a long time is E: freezer.'])
(20, ['The question is asking about where meat can last a long time.', 'Sub-problem: Identify the Options, review each of the given options\n\n- Option A: bakery\n- Option B: ham sandwich\n- Option C: fridge\n- Option D: butcher

In [16]:
# 从M个路径里挑一个最好的来问,也可以问完之后再评估一下选最好的答案

user_q = f"""There is a common sense question:\n{question}

I have broken this math problem down into a series of smaller problems and each sub-problem is solved.
The sub-problems and their corresponding answers are as follows. (Format: Sub-problem-Id: xxx; Sub-problem: xxx; Answer: xxx.)"""

for index, value in enumerate(solution[0][1]):  # 这里仅仅使用了最终得分最高的1条路径来总结得到final answer
    user_q += f"""\nSub-problem-Id: {index}; Sub-problem: {steps[index]}; Answer: {value}."""

Q.append({'role':'user', 'content':f"""Now that all the sub-problems have been solved, so what is the final answer?
Onle give the letter of the correct answer."""})
finalResult = askChatGPT(Q, model=GPT_MODEL, temperature=1)


In [17]:
finalResult

'E'

In [18]:
gold_answer

'E'

In [19]:
if finalResult == gold_answer:
    print('correct')
    success = True  # 任务未受中断,完整地结束了,所以标记为成功
elif finalResult != gold_answer:
    print('error')
    success = True  # 任务未受中断,完整地结束了,所以标记为成功                       

correct
