In [1]:
%load_ext autoreload
%autoreload 2

In [32]:
from time import perf_counter
from game import Game, Solver,BruteForceSolver, SmartSolver, LLMSolver
from typing import List


In [33]:
def benchmark(solver: Solver, games: List[List[float]]) -> List[float]:
    durs = []
    for state in games:
        game = Game(state)
        start = perf_counter()
        solution = solver.solve(game)
        durs.append(perf_counter() - start)
        print(f"{state} -> {solution} ({durs[-1]:.10f}s)")
    print(f"Average duration: {sum(durs) / len(durs):.10f}s")


In [34]:
example_games = [
    [1, 2, 3, 4],
    [7, 3, 3, 7],
    [1, 1, 1, 1],
    [3, 8, 3, 8],
    [11, 1, 11, 5],
    [13, 1, 13, 7],
    [1, 3, 8, 4],
]

In [35]:
brute_force_solver = BruteForceSolver()
benchmark(brute_force_solver, example_games)

[1, 2, 3, 4] -> 4 * (3 + 1 + 2) (0.0002088000s)
[7, 3, 3, 7] -> 7 * (3 + 3 / 7) (0.0042543010s)
[1, 1, 1, 1] -> None (0.0255737070s)
[3, 8, 3, 8] -> 8 / (3 - 8 / 3) (0.0150467030s)
[11, 1, 11, 5] -> (11 * 11 - 1) / 5 (0.0246678060s)
[13, 1, 13, 7] -> (13 * 13 - 1) / 7 (0.0267538070s)
[1, 3, 8, 4] -> 8 + 4 * (1 + 3) (0.0002361000s)
Average duration: 0.0138201749s


In [36]:
smart_solver = SmartSolver(num_games=1000)
benchmark(smart_solver, example_games)

Solved 1 games
Solved 101 games
Solved 201 games
Solved 301 games
Solved 401 games
Solved 501 games
Solved 601 games
Solved 701 games
Solved 801 games
Solved 901 games
Solved 1001 games
[1, 2, 3, 4] -> 4 * (3 + 1 + 2) (0.0000347000s)
[7, 3, 3, 7] -> 7 * (3 + 3 / 7) (0.0000271000s)
[1, 1, 1, 1] -> None (0.0001448000s)
[3, 8, 3, 8] -> 8 / (3 - 8 / 3) (0.0000267000s)
[11, 1, 11, 5] -> (11 * 11 - 1) / 5 (0.0000253000s)
[13, 1, 13, 7] -> (13 * 13 - 1) / 7 (0.0000234000s)
[1, 3, 8, 4] -> 8 + 4 * (1 + 3) (0.0000238000s)
Average duration: 0.0000436857s


In [None]:
llm_solver = LLMSolver(model="gpt-4o")
benchmark(llm_solver, example_games)

[1, 2, 3, 4] -> 1 * 2 * 3 * 4 (23.3024193430s)
[7, 3, 3, 7] -> 7 * (3 + 3 / 7) (114.4400267490s)
Agent finished but game not solved.
Final state: [1.0, 1.0, 1.0, 1.0]
[1, 1, 1, 1] -> None (4.3904751820s)
Error during solving: Recursion limit of 25 reached without hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/GRAPH_RECURSION_LIMIT
[3, 8, 3, 8] -> None (105.7038135200s)
[11, 1, 11, 5] -> (11 * 11 - 1) / 5 (17.3491119680s)
Agent finished but game not solved.
Final state: [1.0, 7.0, 13.0, 13.0]
[13, 1, 13, 7] -> None (66.7067677390s)
[1, 3, 8, 4] -> (4 + 8) * (3 - 1) (14.4785624470s)
Average duration: 49.4815967069s
