In [1]:
from dotenv import load_dotenv
import random
from openpipe.client import OpenPipe

load_dotenv()

op_client = OpenPipe()
print("OpenPipe client initialized")

random.seed(42)

OpenPipe client initialized


In [2]:
import art
import openai
import time
from litellm import acompletion

from .utils import generate_game, get_opponent_move, render_board, check_winner, apply_agent_move, get_trajectory_messages, AgentMove


@art.retry(exceptions=(openai.LengthFinishReasonError))
async def rollout(
    model: str, iteration: int, is_validation: bool
) -> art.Trajectory:

    game = generate_game()

    trajectory = art.Trajectory(
        messages_and_choices=[
            {
                "role": "system",
                "content": f"You are a tic-tac-toe player. You are playing against an opponent. Always choose the move most likely to lead to an eventual win. Return the move in the format 'A1', 'B2', 'C3', etc. You are the {game['agent_symbol']} symbol.",
            }
        ],
        reward=0,
        metrics={"test": 5},
    )

    if game["agent_symbol"] == "o":
        starting_opponent_move = get_opponent_move(game)
        game["board"][starting_opponent_move[0]][starting_opponent_move[1]] = game[
            "opponent_symbol"
        ]


    while check_winner(game["board"]) is None:

        trajectory.messages_and_choices.append(
            {"role": "user", "content": render_board(game)}
        )

        requested_at = int(time.time() * 1000)
        messages = get_trajectory_messages(trajectory)

        async def get_completion():
            return await acompletion(
                model=model,
                messages=messages,
                response_format=AgentMove
            )

        try:
            chat_completion = await get_completion()
            last_completion = chat_completion
        except openai.LengthFinishReasonError as e:
            raise e
        except Exception as e:
            print("caught exception generating chat completion")
            print(e)
            global failing_trajectory
            failing_trajectory = trajectory
            raise e

        try:
            op_client.report(
                requested_at=requested_at,
                received_at=int(time.time() * 1000),
                req_payload={
                    "model": model,
                    "messages": messages,
                    "metadata": {
                        "notebook-id": "tic-tac-toe",
                        "iteration": str(iteration),
                        "validation": str(is_validation),
                        "move_number": str(len(trajectory.messages_and_choices) - 1),
                    },
                },
                resp_payload=chat_completion,
                status_code=200,
            )
        except Exception as e:
            print(f"Error reporting to OpenPipe: {e}")

        choice = chat_completion.choices[0]
        content = choice.message.content
        assert isinstance(content, str)
        trajectory.messages_and_choices.append(choice)

        try:
            apply_agent_move(game, content)
        except ValueError:
            trajectory.reward = -1
            break

        if check_winner(game["board"]) is not None:
            break

        opponent_move = get_opponent_move(game)
        game["board"][opponent_move[0]][opponent_move[1]] = game["opponent_symbol"]

    winner = check_winner(game["board"])

    if winner == game["agent_symbol"]:
        trajectory.reward = 1
    elif winner == game["opponent_symbol"]:
        trajectory.reward = 0
    elif winner == "draw":
        trajectory.reward = 0.5

    try:
        op_client.update_log_metadata(
            filters=[
                {
                    "field": "completionId",
                    "equals": last_completion.id,
                }
            ],
            metadata={
                "reward": str(trajectory.reward),
                "reward_assigned": "true",
            }
        )
    except Exception as e:
        print(f"Error updating log metadata: {e}")

    return trajectory

In [3]:
from art.utils.benchmark_rollout import benchmark_rollout

await benchmark_rollout(
    "gpt-4o-mini",
    100,
    rollout,
)


Benchmarking rollout:   0%|          | 0/100 [00:00<?, ?it/s]

Average reward for gpt-4o-mini: 0.285


0.285

In [4]:
await benchmark_rollout(
    "gpt-4o-2024-11-20",
    100,
    rollout,
)

Benchmarking rollout:   0%|          | 0/100 [00:00<?, ?it/s]

Average reward for gpt-4o-2024-11-20: 0.36


0.36

In [5]:
await benchmark_rollout(
    "o3-mini-2025-01-31",
    100,
    rollout,
)

Benchmarking rollout:   0%|          | 0/100 [00:00<?, ?it/s]

Average reward for o3-mini-2025-01-31: 0.95


0.95

In [6]:
await benchmark_rollout(
    "gpt-4.5-preview-2025-02-27",
    10,
    rollout,
)

Benchmarking rollout:   0%|          | 0/10 [00:00<?, ?it/s]

Average reward for gpt-4.5-preview-2025-02-27: 0.85


0.85