In [1]:
# -*- coding: utf-8 -*-
import argparse
import copy
import json
import os
import pickle
import random
import sys
import re
import numpy as np
import openai
from typing import List, Dict, Any
import ast
from scan_utils import *
from tqdm import tqdm

#GPT_MODEL = "gpt-4-turbo-preview"  # [gpt-4-turbo-preview]
GPT_MODEL = "gpt-3.5-turbo"  # [gpt-3.5-turbo-preview]

setOpenAi(keyid =4)

0

In [2]:
def split_instruction(instruction):
    parts = instruction.split('OUT:')
    question = parts[0].replace('IN:', '').strip()
    answer = parts[1].strip()
    return question, answer

In [3]:
tasks = []
solutions = []
count = 0
N =100
with open("/Users/natehu/Desktop/Tsinghua Research/TaDe/v1/scan_tasks.txt", 'r', encoding= 'utf-8') as file:
    for line in file:            
            question, actions = split_instruction(line.strip())            
            tasks.append(question)
            actions = [action.replace("I_", "") for action in actions.split()]
            solutions.append(actions)
            if count == N:
                break
    

In [20]:
def run(question):
    prompt_cot = """ 
    Instruction:
    Your task is to process a series of command instructions and generate the appropriate sequence of commands.
    
    Here are some examples:
    Q: "turn left"
    A: "turn left" outputs "TURN LEFT".

    Q: "turn right"
    A: "turn right" outputs "TURN RIGHT".

    Q: "jump left"
    A: The output of “jump left” concatenates: the output of “turn left”, the output of “jump”. “turn left” outputs “TURN LEFT”. “jump” outputs “JUMP”. So concatenating the output of “turn left” and the output of “jump” leads to “TURN LEFT” + “JUMP”. So the output of “jump left” is “TURN LEFT” + “JUMP”.

    Q: "run right"
    A: The output of "run right" concatenates: the output of "turn right", the output of "run". "turn right" outputs "TURN RIGHT". "run" outputs "RUN". So concatenating the output of "turn right" and the output of "run" leads to "TURN RIGHT" + "RUN". So the output of "run right" is "TURN RIGHT" + "RUN".

    Q: "look twice"
    A: The output of "look twice" concatenates: the output of "look", the output of "look". "look" outputs "LOOK". So repeating the output of "look" two times leads to "LOOK" * 2. So the output of "look twice" is "LOOK" * 2.

    Q: "run and look twice"
    A: The output of "run and look twice" concate+nates: the output of "run", the output of "look twice". "run" outputs "RUN". "look twice" outputs "LOOK" * 2. So concatenating the output of "run" and the output of "look twice" leads to "RUN" + "LOOK" * 2. So the output of "run and look twice" is "RUN" + "LOOK" * 2.

    Q: "jump right thrice"
    A: The output of "jump right thrice" concatenates: the output of "jump right", the output of "jump right", the output of "jump right". "jump right" outputs "TURN RIGHT" + "JUMP". So repeating the output of "jump right" three times leads to ("TURN RIGHT" + "JUMP") * 3. So the output of "jump right thrice" is ("TURN RIGHT" + "JUMP") * 3.

    Q: "walk after run"
    A: The output of "walk after run" concatenates: the output of "run", the output of "walk". "run" outputs "RUN". "walk" outputs "WALK". So concatenating the output of "run" and the output of "walk" leads to "RUN" + "WALK". So the output of "walk after run" is "RUN" + "WALK".

    Q: "turn opposite left"
    A: The output of "turn opposite left" concatenates: the output of "turn left", the output of "turn left". "turn left" outputs "TURN LEFT". So repeating the output of "turn left" twice leads to "TURN LEFT" * 2. So the output of "turn opposite left" is "TURN LEFT" * 2.

    Q: "turn around left"
    A: The output of "turn around left" concatenates: the output of "turn left", the output of "turn left", the output of "turn left", the output of "turn left". "turn left" outputs "TURN LEFT". So repeating the output of "turn left" four times leads to "TURN LEFT" * 4. So the output of "turn around left" is "TURN LEFT" * 4. Q: "turn opposite right" A: The output of "turn opposite right" concatenates: the output of "turn right", the output of "turn right". "turn right" outputs "TURN RIGHT". So repeating the output of "turn right" twice leads to "TURN RIGHT" * 2. So the output of "turn opposite right" is "TURN RIGHT" * 2.

    Q: "turn around right"
    A: The output of "turn around right" concatenates: the output of "turn right", the output of "turn right", the output of "turn right", the output of "turn right". "turn right" outputs "TURN RIGHT". So repeating the output of "turn right" four times leads to "TURN RIGHT" * 4. So the output of "turn around right" is "TURN RIGHT" * 4.

    Q: "walk opposite left"
    A: The output of "walk opposite left" concatenates: the output of "turn opposite left", the output of "walk". "turn opposite left" outputs "TURN LEFT" * 2. "walk" outputs "WALK". So concatenating the output of "turn opposite left" and the output of "walk" leads to "TURN LEFT" * 2 + "WALK". So the output of "walk opposite left" is "TURN LEFT" * 2 + "WALK".

    Q: "walk around left"
    A: The output of "walk around left" concatenates: the output of "walk left", the output of "walk left", the output of "walk left", the output of "walk left". "walk left" outputs "TURN LEFT" + "WALK". So repeating the output of "walk around left" four times leads to ("TURN LEFT" + "WALK") * 4. So the output of "walk around left" is ("TURN LEFT" + "WALK") * 4.

    
    Pay attention to the use of parentheses.
    """
    
    query_sys = {
        "role":"system",
        "content": prompt_cot
    }
    
    query_user = {
        "role":"user",
        "content": f"""Let's think Step by Step, solve this question: {question}
        The output should be a list of commands in a string. For example: "RUN" * 2 + "TURN_LEFT" + "LOOK" * 3.
        """
    }
    
    query = [query_sys, query_user]
    
    result = askChatGPT(query, model=GPT_MODEL,temperature=1)
    
    print("******************************** sanity check ********************************")
    print(result)
    
    # 做mapping 得到一个式子
    
    queyr_assistant = {
        "role":"assistant",
        "content": result
    }
    
    query.append(queyr_assistant)
    
    q_format = {
        "role":"user",
        "content": """
        What is the output? 
        Make sure only give the answer. Don't include any other information.
        If the answer is "RUN" * 2 + "TURN_LEFT" + "LOOK" * 3, you should only write 'RUN' * 2 + 'TURN_LEFT' + 'LOOK' * 3.
        """
    }
    
    query.append(q_format)
    
    ## 再问一次确认答案
    
    finalanswer = askChatGPT(query, model=GPT_MODEL,temperature=1)
    
    print("******************************** final answer ********************************")
    print(finalanswer)
    
    answer_assistant = {
        "role":"assistant",
        "content": finalanswer
    }
    
    query.append(answer_assistant)
    
    ## 最后把这个公式给转化成一系列的命令
    
    rewrite_system = {"role": "system", "content": f"""
    Now I have a pseudo action sequence expression with parentheses and multiplication. I need you to help me convert this into a sequence of actions without an operator sign.
    6 examples are as follows:    
        
    Q: "JUMP" * 3
    Rewrite: "JUMP" * 3
    A: 1 JUMP 2 JUMP 3 JUMP

    Q: "RUN" * 4 * 2
    Rewrite: "RUN" * 8
    A: 1 RUN 2 RUN 3 RUN 4 RUN 5 RUN 6 RUN 7 RUN 8 RUN

    Q: "TURN RIGHT" + "WALK"
    Rewrite: "TURN RIGHT" + "WALK"
    A: TURN RIGHT WALK

    Q: ("TURN LEFT" + "LOOK") * 2 + "TURN LEFT" + "LOOK"
    Rewrite: ("TURN LEFT" + "LOOK") * 2 + "TURN LEFT" + "LOOK"
    A: 1 (TURN LEFT LOOK) 2 (TURN LEFT LOOK) TURN LEFT LOOK

    Q: ("TURN RIGHT" * 2 + "JUMP") * 4
    Rewrite: ("TURN RIGHT" * 2 + "JUMP") * 4
    A: 1 (1 TURN RIGHT 2 TURN RIGHT JUMP) 2 (1 TURN RIGHT 2 TURN RIGHT JUMP) 3 (1 TURN RIGHT 2 TURN RIGHT JUMP) 4 (1 TURN RIGHT 2 TURN RIGHT JUMP)

    Q: "TURN LEFT" * 2 + ("TURN RIGHT" + "WALK") * 4 * 2
    Rewrite: "TURN LEFT" * 2 + ("TURN RIGHT" + "WALK") * 8
    A: 1 TURN LEFT 2 TURN LEFT 1 (TURN RIGHT WALK) 2 (TURN RIGHT WALK) 3 (TURN RIGHT WALK) 4 (TURN RIGHT WALK) 5 (TURN RIGHT WALK) 6 (TURN RIGHT WALK) 7 (TURN RIGHT WALK) 8 (TURN RIGHT WALK)
    """}
    
    Q_change = {"role": "user", "content": f"""The pseudo action sequence to be converted is as follows: {finalanswer} Please change it to the action sequences.
Please JUST answer the result."""}
    
    Q_now = [rewrite_system, Q_change]
    
    actions = askChatGPT(Q_now, model=GPT_MODEL, temperature = 1)

    # 删除数字
    text_no_numbers_and_brackets = re.sub(r'[\d\(\)]', '', actions)
    # 替换 TURN 后面的空格为下划线
    text_formatted = re.sub(r'TURN\s+', 'TURN_', text_no_numbers_and_brackets)
    # 去掉多余的空格
    text_formatted = re.sub(r'\s+', ' ', text_formatted).strip()
    
    return text_formatted

In [21]:
i = 78

question = tasks[i]
golden_answer = solutions[i]

print(question)
print(golden_answer)

walk around left after run around left thrice
['TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'RUN', 'TURN_LEFT', 'WALK', 'TURN_LEFT', 'WALK', 'TURN_LEFT', 'WALK', 'TURN_LEFT', 'WALK']


In [22]:
solution = run(question)

******************************** sanity check ********************************
First, let's break down the given command into smaller parts:

1. "walk around left"
    - This command translates to ("TURN LEFT" + "WALK") * 4 as per the provided instructions.
    - So, the output of "walk around left" is ("TURN LEFT" + "WALK") * 4.

2. "run around left thrice"
    - This command consists of two parts:
        a. "run around left"
            - This translates to ("TURN LEFT" + "RUN") * 4.
        b. "thrice"
            - This indicates repeating the previous command ("TURN LEFT" + "RUN") * 4 three times.

Now, we combine the above two parts:

    Output: (("TURN LEFT" + "WALK") * 4) + (("TURN LEFT" + "RUN") * 4) * 3

Therefore, the final answer is:

    (("TURN LEFT" + "WALK") * 4) + (("TURN LEFT" + "RUN") * 4) * 3
******************************** final answer ********************************
(('TURN LEFT' + 'WALK') * 4) + (('TURN LEFT' + 'RUN') * 4) * 3


In [23]:
solution

'TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT WALK TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT RUN TURN_LEFT RUN'

In [24]:
actionList = solution.split()
actionList

['TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN']

In [16]:
golden_answer

['TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'RUN',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK',
 'TURN_LEFT',
 'WALK']

In [2]:
def calculate_price(number_of_token, type, model):
    if model == "gpt-3.5-turbo":
        if type == "input":
            per_token = 0.5 / 1000000
            return number_of_token * per_token
        elif type == "output":
            per_token = 1.5 / 1000000
            return number_of_token * per_token
    elif model == "gpt-4-turbo-preview":
        if type == "input":
            per_token = 10 / 1000000
            return number_of_token * per_token
        elif type == "output":
            per_token = 30 / 1000000
            return number_of_token * per_token

In [3]:
calculate_price(110494, "output", "gpt-3.5-turbo") + calculate_price(1832845, "input", "gpt-3.5-turbo")

1.0821634999999998

In [4]:
110494 + 1832845

1943339

In [4]:
52543 + 14611

67154

In [103]:
96248 + 317415

413663

In [5]:
calculate_price(11588, "output", "gpt-4-turbo-preview") + calculate_price(21104, "input", "gpt-4-turbo-preview")

0.5586800000000001

In [6]:
11588 + 21104

32692

In [4]:
31062 + 69108

100170

In [45]:
115643+50142


165785

In [34]:
22030925/22495012

0.9793693375224695

In [36]:
0.979 * 4816280

4715138.12

In [72]:
test = {
    "role":"user",
    "content": "Let 1≤r≤n and consider all subsets of r elements of the set {1, 2, ..., n}. What is the arithmetic mean of the smallest element of these subsets, as an expression of n and r?"
}

In [81]:
def GPTlogprob(query, model):
    response = openai.ChatCompletion.create(
      model=model,
      messages=query,
      logprobs=True
    )
    return response

In [73]:
response = openai.ChatCompletion.create(
    model=GPT_MODEL,
    messages=[test],
    logprobs = True
    )

In [87]:
def calculate_average_logprob(response):
    """
    Calculate the average log probability of tokens from an OpenAI API response.

    Parameters:
    response (dict): The response dictionary from OpenAI API containing 'choices' with 'logprobs'.

    Returns:
    float: The average log probability of all tokens in the response.
    """
    # Extract the response's first choice (assuming the relevant data is there)
    choice = response['choices'][0]

    # Check if 'logprobs' and 'content' are available in the response
    if 'logprobs' in choice and 'content' in choice['logprobs']:
        # Get the list of log probabilities
        logprobs_list = choice['logprobs']['content']

        # Extract log probabilities
        logprobs = [item['logprob'] for item in logprobs_list]

        # Calculate the average log probability
        if logprobs:
            average_logprob = sum(logprobs) / len(logprobs)
            return np.round(np.exp(average_logprob)*100,2)
        else:
            return 0  # Return 0 if no log probabilities are found
    else:
        raise ValueError("Log probabilities are missing from the response")


print("Average Log Probability:", calculate_average_logprob(response))


Average Log Probability: 76.15


In [78]:
np.round(np.exp(calculate_average_logprob(response))*100,2)

88.0

In [80]:
response.choices[0].message.content

"To find the arithmetic mean of the smallest element of all subsets of r elements of the set {1, 2, ..., n}, we need to consider how many subsets contain a particular element as the smallest element.\n\nLet's consider the element 1. In order for 1 to be the smallest element of a subset of size r, the remaining r-1 elements must be chosen from the set {2, 3, ..., n}. There are (n-1 choose r-1) such subsets.\n\nSimilarly, if we consider the element 2, in order for 2 to be the smallest element of a subset of size r, the remaining r-1 elements must be chosen from the set {3, 4, ..., n}. There are (n-2 choose r-1) such subsets.\n\nContinuing this pattern, if we consider the element k, in order for k to be the smallest element of a subset of size r, the remaining r-1 elements must be chosen from the set {k+1, k+2, ..., n}. There are (n-k choose r-1) such subsets.\n\nTherefore, the arithmetic mean of the smallest element of all subsets of r elements of the set {1, 2, ..., n} is:\n\n(1*(n-1 ch

In [None]:
"To find the arithmetic mean of the smallest element of all subsets of r elements of the set {1, 2, ..., n}, we need to consider how many subsets contain a particular element as the smallest element.\n\nLet's consider the element 1. In order for 1 to be the smallest element of a subset of size r, the remaining r-1 elements must be chosen from the set {2, 3, ..., n}. There are (n-1 choose r-1) such subsets.\n\nSimilarly, if we consider the element 2, in order for 2 to be the smallest element of a subset of size r, the remaining r-1 elements must be chosen from the set {3, 4, ..., n}. There are (n-2 choose r-1) such subsets.\n\nContinuing this pattern, if we consider the element k, in order for k to be the smallest element of a subset of size r, the remaining r-1 elements must be chosen from the set {k+1, k+2, ..., n}. There are (n-k choose r-1) such subsets.\n\nTherefore, the arithmetic mean of the smallest element of all subsets of r elements of the set {1, 2, ..., n} is:\n\n(1*(n-1 choose r-1) + 2*(n-2 choose r-1) + ... + (n-r+1)*r choose r-1) / (n choose r)\n\nThis expression represents the sum of the smallest elements of all subsets of r elements, divided by the total number of subsets."

In [97]:
test = {
    "role":"user",
    "content": "Let 1≤r≤n and consider all subsets of r elements of the set {1, 2, ..., n}. What is the arithmetic mean of the smallest element of these subsets, as an expression of n and r?"
}
query = [test]

In [84]:
response = GPTlogprob(query, GPT_MODEL)

In [88]:
calculate_average_logprob(response)

76.15

In [89]:
response.choices[0].message.content

"To find the arithmetic mean of the smallest element of all subsets of r elements of the set {1, 2, ..., n}, we need to consider how many subsets will contain a particular number as the smallest element.\n\nLet's consider the number k (1 ≤ k ≤ n). The number k will be the smallest element of subsets that contain the number k along with (r-1) other elements chosen from the set {1, 2, ..., k-1}. The number of ways to choose (r-1) elements from {1, 2, ..., k-1} is denoted by C(k-1, r-1).\n\nTherefore, the number k will be the smallest element in C(k-1, r-1) subsets. Since k can range from 1 to n, we need to sum up the number of subsets for each k and divide by the total number of subsets, which is C(n, r).\n\nThe arithmetic mean of the smallest element can be calculated as follows:\n\nArithmetic Mean = Σ[k * C(k-1, r-1)] / C(n, r)\n                  = Σ[(k * C(k-1, r-1))] / C(n, r) for k = 1 to n\n\nTherefore, the arithmetic mean of the smallest element of subsets of r elements of the set

In [98]:
query

[{'role': 'user',
  'content': 'Let 1≤r≤n and consider all subsets of r elements of the set {1, 2, ..., n}. What is the arithmetic mean of the smallest element of these subsets, as an expression of n and r?'}]

In [99]:
query.append({'role': 'assistant', 'content': response.choices[0].message.content})
query.append({'role': 'user', 'content': "therefore, what is the final answer"})

In [100]:
response_final = GPTlogprob(query, GPT_MODEL)

In [101]:
calculate_average_logprob(response_final)

76.56