In [1]:
import random
import requests
import json
from utils import create_exps, prepare_data, get_story

from tqdm import tqdm

random.seed(0)

%load_ext autoreload
%autoreload 2

In [2]:
def query_gpt4_chat(
    api_key, messages, model="gpt-4o", temperature=0.2, max_tokens=200
):
    """
    Queries GPT-4 Chat API with the given messages and parameters.

    :param api_key: Your API Key for the OpenAI GPT
    :param messages: A list of message dicts, where each dict contains 'role': 'user' or 'system', 'content': 'message text'
    :param model: Which GPT model to use
    :param temperature: Sampling temperature
    :param max_tokens: Maximum number of tokens to generate
    :return: The API response, including the generated text response
    """
    url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
        "OpenAI-Organization": "org-pqBwRmTwXkMQOPSIJPMi1ltR",
    }

    payload = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
        "max_tokens": max_tokens,
    }

    response = requests.post(url, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        return (
            response.json()
            .get("choices", [{}])[0]
            .get("message", {})
            .get("content", "")
        )
    else:
        raise Exception(f"Error querying GPT-4 Chat API: {response.text}")

In [3]:
system_prompt = """You are a helpful assistant that helps people paraphrase their short structured stories into more natural stories.

Instructions:
 - Generate a more natural story based on the given structured story containing multiple characters, objects, containers and actions.
 - Make sure the generrate story has exactly the same meaning as the structured story.
 - Make sure the generated story has the same timeline as the structured story.
 - Make sure the generated story is coherent and grammatically correct."""

In [4]:
user_prompt = "Strctured Story: {story}"

In [5]:
path = "data/SymbolicToM Datasets/Fixed and Unambiguous ToMi/"
with open(f"{path}/test.txt", "r") as f:
    test_data = f.readlines()
with open(f"{path}/test.trace", "r") as f:
    test_trace = f.readlines()

stories = get_story(test_data)
processed_stories = prepare_data(stories, test_trace, n_priming_eps=0, add_question=True)

In [6]:
categorized_data = {}

for data in processed_stories:
    if data["category"] not in categorized_data:
        categorized_data[data["category"]] = [data]
    else:
        categorized_data[data["category"]].append(data)

In [10]:
del categorized_data["memory"], categorized_data["reality"]
categorized_data.keys()

dict_keys(['first_order_true_belief', 'second_order_true_belief', 'second_order_false_belief', 'first_order_false_belief'])

In [12]:
n = 50

samples = []
for category in categorized_data:
    indices = random.sample(range(len(categorized_data[category])), n)
    for idx in tqdm(indices, desc=category):
        story = categorized_data[category][idx]
        system_msg = {"role": "system", "content": system_prompt}
        user_msg = {"role": "user", "content": user_prompt.format(story=story['input'])}
        messages = [system_msg, user_msg]

        api_key = "sk-proj-0RkTDOm5cLKctlevusS0T3BlbkFJYDBjDQi5b2bIl1g5jFS2"

        response = query_gpt4_chat(api_key, messages)

        samples.append({
            "input": f"{response}\nQuestion: {story['question']}\nAnswer:",
            "category": category,
            "target": story["target"],
        })

# Write samples to json file
with open(f"data/paraphrased_ToMi/dataset.json", "w") as f:
    json.dump(samples, f, indent=4)    

first_order_true_belief:   0%|          | 0/50 [00:00<?, ?it/s]

first_order_true_belief: 100%|██████████| 50/50 [01:35<00:00,  1.91s/it]
second_order_true_belief: 100%|██████████| 50/50 [01:35<00:00,  1.91s/it]
second_order_false_belief: 100%|██████████| 50/50 [01:34<00:00,  1.89s/it]
first_order_false_belief: 100%|██████████| 50/50 [01:28<00:00,  1.77s/it]


In [51]:
for category in tqdm(categorized_data):
    samples = []
    with open(f"priming_examples/original/{category}.txt", "r") as f:
        priming_examples = f.read().replace("\\n", "\n").split("\n\n")

    for example in priming_examples:
        if example:
            story = example[:example.index("Question")].split(":")[-1].strip()
            question = example[example.index("Question"):].split("\n")[0].strip()
            answer = example[example.index("Answer"):].split(":")[-1].strip()

            system_msg = {"role": "system", "content": system_prompt}
            user_msg = {"role": "user", "content": user_prompt.format(story=story)}
            messages = [system_msg, user_msg]

            api_key = "sk-proj-0RkTDOm5cLKctlevusS0T3BlbkFJYDBjDQi5b2bIl1g5jFS2"

            response = query_gpt4_chat(api_key, messages)

            samples.append({
                "input": f"{response}\n{question}\nAnswer:",
                "category": category,
                "target": " " + answer,
            })
    
    with open(f"priming_examples/paraphrases/{category}.json", "w") as f:
        json.dump(samples, f, indent=4)


100%|██████████| 4/4 [00:40<00:00, 10.13s/it]


In [38]:
with open(f"priming_examples/first_order_false_belief.txt", "r") as f:
    priming_examples = f.read().replace("\\n", "\n").split("\n\n")

In [45]:
print(priming_examples[0][priming_examples[0].index("Answer:"):])

Answer: bathtub


In [42]:
priming_examples[0][priming_examples[0].index("Question"):].split("\n")[0]

'Question: Where will Ethan look for the stockings?'

In [10]:
story

{'input': 'Avery entered the staircase.\nAlexander entered the staircase.\nCharlotte entered the staircase.\nThe cabbage is in the crate.\nThe crate is in the staircase.\nCharlotte exited the staircase.\nAlexander moved the cabbage to the suitcase.\nThe suitcase is in the staircase.\n',
 'target': ' crate',
 'category': 'first_order_false_belief',
 'question': 'Where will Charlotte look for the cabbage?'}

In [11]:
print(response)

Ethan walked into the front yard, while Carter made his way into the pantry. Shortly after, Chloe also stepped into the front yard. In the yard, there was a crate containing some stockings. After a while, Ethan left the front yard, and Carter came out of the pantry. Chloe then took the stockings from the crate and placed them in the bathtub, which was also in the front yard. Eventually, Ethan returned to the front yard.
