In [1]:
# !pip install -r ../requirements.txt

In [10]:
import os
import re

import json
import numpy as np
from typing import Tuple

import matplotlib.pyplot as plt
from langchain.llms import OpenAI

from langchain_experimental.tot.base import ToTChain
from langchain_experimental.tot.checker import ToTChecker
from langchain_experimental.tot.thought import ThoughtValidity

##Load API key ------------------
f = open('../apikeys/api.txt', 'r')
key = f.readline()
os.environ['OPENAI_API_KEY'] = key.replace('\n','')

In [11]:
class MyChecker(ToTChecker):
    def evaluate(self,
        problem_description: str,
        thoughts: Tuple[str, ...] = ()) -> ThoughtValidity:
        
        last_thought = thoughts[-1]
        clean_solution = last_thought.replace(" ", "").replace('"', "")
        regex_solution = clean_solution.replace("*", ".").replace("|", "\\|")
        if sudoku_solution in clean_solution:
            return ThoughtValidity.VALID_FINAL
        elif re.search(regex_solution, sudoku_solution):
            return ThoughtValidity.VALID_INTERMEDIATE
        else:
            return ThoughtValidity.INVALID


In [12]:
difficulty_map = {'base':2,
                    'easy': 2,
                    'medium': 2,
                    'hard': 3,
                    'expert':4}

In [15]:
# data_config = {}
datadir = '../src/data/'
models = ["text-davinci-003"]
data_config = {}
for model in models:
    llm = OpenAI(temperature=0.1, max_tokens=512, model=model)
    files = [file for file in os.listdir(datadir) if '5x5' in file or '9x9' in file]
    for file in files:

        tile_size = file.split('.')[0].split('_')[-1]
        grid = tile_size.split('x')[0]
        subgrid = str(int(np.sqrt(int(grid))))+'x'+str(int(np.sqrt(int(grid))))
        if grid == '5':
            subgrid = ''
        children_thoughts = difficulty_map[file.split('_')[0]]*int(grid)
        
        with open(datadir+file) as f:
            data = f.read()

        data = json.loads(data)

        sudoku_puzzle = data['sudoku_puzzle']
        sudoku_solution = data['sudoku_solution']

        problem_description = f"""
            {sudoku_puzzle}

            - This is a {tile_size} Sudoku puzzle.
            - The * represents a cell to be filled.
            - The | character separates rows.
            - At each step, replace one or more * with digits 1-{grid}.
            - There must be no duplicate digits in any row, column or {subgrid} subgrid.
            - Keep the known digits from previous valid thoughts in place.
            - Each thought can be a partial or the final solution.
            """.strip()


        temp_dict = {'grid_size':tile_size,
                    'children_thoughts':children_thoughts,
                    'problem_description': problem_description,
                    'initial_sudoku': sudoku_puzzle,
                    'sudoku_solution': sudoku_solution,
                    'children': children_thoughts,
                    }

        checker = MyChecker()

        k = 40
        tot_chain = ToTChain(llm=llm,
                            checker=MyChecker(),
                            k=k,
                            c=12,
                            verbose=False,
                            verbose_llm=False)
                            
        solution = tot_chain.run(problem_description=problem_description)
        if solution == sudoku_solution:
            temp_dict['solved'] = True
        else:
            temp_dict['solved'] = False
        
        temp_dict['final_solution'] = solution
        temp_dict['k'] = k
        print(temp_dict)
        data_config[file] = temp_dict




{'grid_size': '5x5', 'children_thoughts': 10, 'problem_description': '*,5,*,2,*|1,*,5,*,2|2,*,3,*,5|*,2,*,5,*|5,*,2,*,4\n\n            - This is a 5x5 Sudoku puzzle.\n            - The * represents a cell to be filled.\n            - The | character separates rows.\n            - At each step, replace one or more * with digits 1-5.\n            - There must be no duplicate digits in any row, column or  subgrid.\n            - Keep the known digits from previous valid thoughts in place.\n            - Each thought can be a partial or the final solution.', 'initial_sudoku': '*,5,*,2,*|1,*,5,*,2|2,*,3,*,5|*,2,*,5,*|5,*,2,*,4', 'sudoku_solution': '4,5,1,2,3|1,3,5,4,2|2,4,3,1,5|3,2,4,5,1|5,1,2,3,4', 'children': 10, 'solved': False, 'final_solution': 'No solution found', 'k': 40}
{'grid_size': '9x9', 'children_thoughts': 18, 'problem_description': '*,3,5,4,6,7,1,9,8|4,7,*,5,9,1,3,2,6|1,6,9,*,2,8,7,5,4|6,*,7,9,4,3,3,8,2|8,5,3,2,*,6,4,7,9|9,2,4,8,7,5,*,1,3|7,9,1,6,8,4,2,*,5|5,4,2,1,3,9,8,6,*|3

In [None]:
datadir = '../src/artifacts/'
savefile = 'text-davinci-003_difficulty_eda_variable_grid.json'
file_path = datadir+savefile
# Writing the dictionary to a JSON file in append mode
with open(file_path, "a") as json_file:
    json.dump(data, json_file)

In [16]:
data_config

{'medium_problem_5x5.txt': {'grid_size': '5x5',
  'children_thoughts': 10,
  'problem_description': '*,5,*,2,*|1,*,5,*,2|2,*,3,*,5|*,2,*,5,*|5,*,2,*,4\n\n            - This is a 5x5 Sudoku puzzle.\n            - The * represents a cell to be filled.\n            - The | character separates rows.\n            - At each step, replace one or more * with digits 1-5.\n            - There must be no duplicate digits in any row, column or  subgrid.\n            - Keep the known digits from previous valid thoughts in place.\n            - Each thought can be a partial or the final solution.',
  'initial_sudoku': '*,5,*,2,*|1,*,5,*,2|2,*,3,*,5|*,2,*,5,*|5,*,2,*,4',
  'sudoku_solution': '4,5,1,2,3|1,3,5,4,2|2,4,3,1,5|3,2,4,5,1|5,1,2,3,4',
  'children': 10,
  'solved': False,
  'final_solution': 'No solution found',
  'k': 40},
 'medium_problem_9x9.txt': {'grid_size': '9x9',
  'children_thoughts': 18,
  'problem_description': '*,3,5,4,6,7,1,9,8|4,7,*,5,9,1,3,2,6|1,6,9,*,2,8,7,5,4|6,*,7,9,4,3,3,8,

In [None]:
plt.tight_layout()

In [None]:
plt.figure(figsize=(10,6))  
ax = plt.axes()
# depict illustration
plt.scatter([i for i in range(11)], ks)
plt.xlabel('jobid')  
plt.ylabel('Max number of Interactions(k)')
plt.title("k required to solve base problem based on initial conditions")
# setting ticks for x-axis
  
# setting label for x tick
ax.set_xticks([i for i in range(11)])
ax.set_xticklabels([i.split('.')[0].split('_')[0:2] for i in data_config.keys()], rotation = 30)
plt.savefig(datadir+'max_k.jpg')

In [None]:
ax = plt.axes()
  
# depict illustration
plt.scatter([i for i in range(3)], [int(np.round(sum(difficulty_dict[key]['ks'])/difficulty_dict[key]['counter'],0)) for key in difficulty_dict.keys()])
plt.xlabel('Difficulty')  
plt.ylabel('Avg k')
plt.title("avg k required to solve problem based on initial conditions")

# setting label for x tick
ax.set_xticks([i for i in range(3)])
ax.set_xticklabels([i for i in difficulty_dict.keys()], rotation = 30)
plt.savefig(datadir+'avg_k.jpg')