In [11]:
import textarena as ta
from dotenv import load_dotenv
import os
from agents import LLMAgent, EIGAgent

load_dotenv()

openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
if openrouter_api_key:
    os.environ["OPENROUTER_API_KEY"] = openrouter_api_key
else:
    raise ValueError("OPENROUTER_API_KEY is not set in the .env file.")

In [12]:
# Initialize base agent for 20 Questions
base_agent = ta.agents.OpenRouterAgent(model_name="openai/gpt-4o")

# Initialize the 20 Questions environment
env = ta.make(env_id="TwentyQuestions-v0-raw")
# Change the gamemaster model (before reset)
env.gamemaster = ta.agents.OpenRouterAgent(model_name="openai/gpt-4o")
print(f"Gamemaster model: {env.gamemaster.model_name}")

# Reset the environment for single player
env.reset(num_players=1)

# Extract ground truth after reset
ground_truth_word = env.game_word
ground_truth_theme = env.game_theme

print(f"ðŸŽ¯ GROUND TRUTH: '{ground_truth_word}' (theme: {ground_truth_theme})")

# Initialize LLM agent with the ground truth theme
llm_agent = EIGAgent(base_agent, ground_truth_theme)

done = False
turn_count = 0

all_observations = []
while not done:  # Safety limit
    _, observation = env.get_observation()
    new_observations = [{"player": obs[0], "message": obs[1]} for obs in observation]
    all_observations.extend(new_observations)

    print(llm_agent.samples)

    print(f"\n{llm_agent.format_history(new_observations)}")

    # Get action from LLM agent - it handles decision making internally
    action = llm_agent(all_observations)
    
    # Step the environment
    done, _ = env.step(action=action)

    if len(new_observations) > 1 and type(llm_agent) == EIGAgent:
        if new_observations[-1]["player"] == -1:
            answer = new_observations[-1]["message"].lower().replace(".", "").strip()
        if new_observations[-2]["player"] == 0:
            question = new_observations[-2]["message"]

        llm_agent.update_weights(question, answer)

    turn_count += 1

# Get final results
rewards, game_info = env.close()
_, observation = env.get_observation()
new_observations = [{"player": obs[0], "message": obs[1]} for obs in observation]
all_observations.extend(new_observations)

print(f"\n{llm_agent.format_history(new_observations)}")

print(f"ðŸŽ¯ Ground truth was: '{ground_truth_word}' (theme: {ground_truth_theme})")
print(f"Final rewards: {rewards}")
print(f"Game info: {game_info}")

Gamemaster model: openai/gpt-4o
ðŸŽ¯ GROUND TRUTH: 'politician' (theme: people)
Initialized EIGAgent with 48 samples for theme 'people'
{'teacher': 1, 'pilot': 1, 'firefighter': 1, 'doctor': 1, 'nurse': 1, 'scientist': 1, 'artist': 1, 'engineer': 1, 'author': 1, 'chef': 1, 'musician': 1, 'actor': 1, 'dancer': 1, 'director': 1, 'athlete': 1, 'coach': 1, 'politician': 1, 'judge': 1, 'lawyer': 1, 'editor': 1, 'farmer': 1, 'plumber': 1, 'electrician': 1, 'mechanic': 1, 'architect': 1, 'entrepreneur': 1, 'librarian': 1, 'astronaut': 1, 'photographer': 1, 'veterinarian': 1, 'pharmacist': 1, 'dentist': 1, 'psychologist': 1, 'bartender': 1, 'barista': 1, 'cashier': 1, 'maid': 1, 'carpenter': 1, 'blacksmith': 1, 'butcher': 1, 'driver': 1, 'tailor': 1, 'trainer': 1, 'detective': 1, 'soldier': 1, 'clown': 1, 'magician': 1, 'sculptor': 1}

[GAME] You are Player 0. You are playing 20 Questions (Basic).
The gamemaster has chosen an object that can be one or two words. This object is related to peopl

KeyboardInterrupt: 