# Tree of Thoughts

Implementation: https://github.com/princeton-nlp/tree-of-thought-llm

In [1]:
import contextlib
import io
import sys
import time
import os
import json

import openai
import pandas as pd
from asteval import Interpreter

openai.api_key = os.environ.get('OPENAI_API_KEY')

In [2]:
import argparse
from tot.methods.bfs import solve
from tot.tasks.game24 import Game24Task

In [3]:
task = Game24Task()
task

<tot.tasks.game24.Game24Task at 0x1185a7c50>

In [4]:
class Game24Data:
    """
    Game24 class for experimenting with tree of thought.
    """
    def __init__(self, path="https://hub.oxen.ai/api/repos/datasets/Game-of-24/file/main/24.csv"):
        self.data = pd.read_csv(path)
        self.interpreter = Interpreter()
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data.iloc[idx]
    
    def sample(self, n=None):
        return self.data.sample(n=n)

In [10]:
# per paper for experiment 2
# beams = 1

# default
beams = 3

In [11]:
args = argparse.Namespace(backend='gpt-4',
                          temperature=0.7,
                          task='game24',
                          naive_run=False,
                          prompt_sample=None,
                          method_generate='propose',
                          method_evaluate='value',
                          method_select='greedy',
                          n_generate_sample=1,
                          n_evaluate_sample=3,
                          n_select_sample=beams)

In [12]:
# example from repository
task = Game24Task()
ys, infos = solve(args, task, 900)
print(ys[0])

functools.partial(<function gpt at 0x107e33d80>, model='gpt-4', temperature=0.7)
-- new_ys --: ('4 + 5 = 9 (left: 6 9 10)\n', '10 - 4 = 6 (left: 5 6 6)\n', '10 - 5 = 5 (left: 4 5 6)\n', '4 * 5 = 20 (left: 6 10 20)\n', '5 + 6 = 11 (left: 4 10 11)\n', '10 / 5 = 2 (left: 2 4 6)\n', '6 * 5 = 30 (left: 4 10 30)\n', '10 * 4 = 40 (left: 5 6 40)\n')
-- sol values --: (3.0, 3.0, 3.0, 3.0, 2.001, 1.002, 0.003, 0.003)
-- choices --: ['4 + 5 = 9 (left: 6 9 10)\n', '10 - 4 = 6 (left: 5 6 6)\n', '10 - 5 = 5 (left: 4 5 6)\n']

-- new_ys --: ('10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n', '10 - 4 = 6 (left: 5 6 6)\n6 / 5 = 1.2 (left: 1.2 6)\n', '10 - 5 = 5 (left: 4 5 6)\n6 / 4 = 1.5 (left: 1.5 5)\n', '4 + 5 = 9 (left: 6 9 10)\n6 + 9 = 15 (left: 10 15)\n', '4 + 5 = 9 (left: 6 9 10)\n9 - 6 = 3 (left: 3 10)\n', '4 + 5 = 9 (left: 6 9 10)\n10 - 6 = 4 (left: 4 9)\n', '4 + 5 = 9 (left: 6 9 10)\n6 * 9 = 54 (left: 10 54)\n', '4 + 5 = 9 (left: 6 9 10)\n9 / 6 = 1.5 (left: 1.5 10)\n', '4 + 5 = 9 (left: 6 

## Experiment 2 — 100 samples

In [None]:
game24_data = Game24Data()
samples = game24_data.sample(n=100)

In [None]:
# source to silence stdout: https://stackoverflow.com/questions/2828953/silence-the-stdout-of-a-function-in-python-without-trashing-sys-stdout-and-resto

@contextlib.contextmanager
def silence_stdout():
    save_stdout = sys.stdout
    sys.stdout = io.StringIO()
    yield
    sys.stdout = save_stdout

def run_experiment():
    infos_list = []
    ys_list = []
    
    for sample_idx in samples.index:
        ys, infos = solve(args, task, sample_idx)
        infos_list.append(infos)
        ys_list.append(ys)
    
    return ys_list, infos_list
    
        
# with silence_stdout():
#     start = time.time()
#     ys_list, infos_list = run_experiment() #prevent stdout... too much!
#     end = time.time()

In [None]:
# print(end - start)

In [None]:
# save results
# infos_results = "../../logs/game24/gpt-4_propose1_value1_greedy_samples100.json"
# json_string = json.dumps(infos_list, indent=4)

# with open(infos_results, "w") as file:
#     file.write(json_string)

## Results on experiment 2
Run time: 60 minutes (3658.445431947708)

In [None]:
infos_results = "../../logs/game24/gpt-4_propose1_value1_greedy_samples100.json"

with open(infos_results) as file:
    data = json.load(file)

In [None]:
def evaluate_tot():
    expressions = {}
    evaluations = []
    
    for idx, _ in enumerate(data):
        result = data[idx]['steps'][3]['select_new_ys'][0].split("Answer: ")[-1].split("= 24")[0]
        expressions[idx] = result
    
    with silence_stdout():
        for k, v in expressions.items():
            try:
                evaluations.append(24==game24_data.interpreter.eval(v))
            except:
                evaluations.append(False)

    return expressions, evaluations

In [None]:
exps, acc = evaluate_tot()

In [None]:
acc.count(True)

| Method     | Success   | Runs |
| --------   | --------  | ---- |
| IO prompt  | 10%       | 1    |
| CoT prompt | 26%       | 1    |
| ToT (b=1)  | 50%       | 1    |