In [13]:
import textarena as ta
from dotenv import load_dotenv
import os

load_dotenv()

True

In [14]:
# Define ground_truth_theme here for this cell to work independently
SAMPLES_PROMPT = f"""
List 50 different objects, items, or concepts that would be appropriate for the theme/category "{{theme}}" in a 20 Questions game. 

Return only a JSON list of strings, like: ["object1", "object2", "object3", ...]

Objects should be:
- Concrete nouns that can be guessed in 20 questions
- Appropriate for the theme "{{theme}}"
- Diverse within the theme
- 1-2 words each"""

REGEN_PROMPT = f"""
List 50 different objects, items, or concepts that would be appropriate for the theme/category "{{theme}}" in a 20 Questions game. 

Return only a JSON list of strings, like: ["object1", "object2", "object3", ...]

Objects should be:
- Concrete nouns that can be guessed in 20 questions
- Appropriate for the theme "{{theme}}"
- Diverse within the theme
- 1-2 words each

They must also be consistent with the following previous questions and answers:
{{history}}"""

DECISION_PROMPT = f"""
Given this 20 Questions game history:

{{history}}

Should you:
A) Ask another question to gather more information
B) Make a guess for the final answer

Consider:
- How much information you have
- How confident you are about the answer
- How many questions remain

Respond with ONLY one word: either "QUESTION" or "GUESS" (no quotes, no explanation)."""

QUESTION_PROMPT = f"""
You are playing 20 Questions. Based on this game history:

{{history}}

What is your next yes/no question? Make it strategic and informative.
Ask only the question, without brackets or extra formatting."""

MOVE_PROMPT = f"""
Based on this 20 Questions game history:

{{history}}

What is your final guess? Provide your answer in square brackets, like [your guess].
Make your best guess based on all the information gathered."""

CONSISTENCY_PROMPT = f"""
You will be given a question and a list of possible objects, coming from a 20 questions game. Your task is to determine which objects are consistent with a "yes" answer to the question, and which are consistent with a "no" answer. Respond with a Python dictionary where the keys are the objects and the values are either "yes" or "no".

Just output the dictionary, no markdown fences or extra text.

Question: "{{question}}"
Possible objects: {{objects}}
"""

In [15]:
import json

class LLMAgent(ta.agents.OpenRouterAgent):
    def __init__(self, openrouter_agent: ta.agents.OpenRouterAgent, ground_truth_theme: str):
        super().__init__(model_name=openrouter_agent.model_name)
        self.openrouter_agent = openrouter_agent
        self.ground_truth_theme = ground_truth_theme
        self.samples = {}
        self.game_history = ""
        self._initialize_samples()
    
    def _initialize_samples(self):
        """Query the OpenRouterAgent for objects consistent with the theme"""
        prompt = SAMPLES_PROMPT.format(theme=self.ground_truth_theme)

        response = self.openrouter_agent(prompt)
        try:
            # Try to extract JSON from the response
            if '[' in response and ']' in response:
                start = response.find('[')
                end = response.rfind(']') + 1
                json_str = response[start:end]
                objects = json.loads(json_str)
                
                # Initialize samples with weight 1
                for obj in objects:
                    self.samples[obj.lower().strip()] = 1
                    
                print(f"Initialized EIGAgent with {len(self.samples)} samples for theme '{self.ground_truth_theme}'")
            else:
                print("Could not parse object list, using empty samples")
                
        except Exception as e:
            print(f"Error parsing samples: {e}")
            print(f"Response was: {response}")
    
    def __call__(self, observation: str) -> str:
        """Main method called by TextArena environment"""
        # Update game history
        self.game_history += f"\n{observation}"
        
        # Use EIG agent to decide whether to question or guess
        if "[Player]" in self.game_history:
            decision = self.decision(self.game_history)
        else:
            decision = "QUESTION"  # Always ask first question
        
        print(f"EIGAgent decision: {decision}")

        if "GUESS" in decision.upper():
            action = self.move(self.game_history)
        else:
            action = self.question(self.game_history)
        
        # Update history with our action
        self.game_history += f"\nPlayer: {action}"
        return action
    
    def decision(self, history: str) -> str:
        """Ask if the agent wants to ask more questions or try to guess"""
        prompt = DECISION_PROMPT.format(history=history)

        response = self.openrouter_agent(prompt)
        # Extract just the decision word, removing any extra text
        decision = response.strip().upper()
        if "QUESTION" in decision:
            return "QUESTION"
        elif "GUESS" in decision:
            return "GUESS"
        else:
            return "QUESTION"  # Default to question if unclear
    
    def question(self, history: str) -> str:
        """Ask the agent for a question"""
        prompt = QUESTION_PROMPT.format(history=history)

        return self.openrouter_agent(prompt)
    
    def move(self, history: str) -> str:
        """Ask the agent for a move (final guess)"""
        prompt = MOVE_PROMPT.format(history=history)
        
        return self.openrouter_agent(prompt)

In [None]:
import ast
import numpy as np

EPSILON = 0.1  # Noise parameter for answers

def binary_entropy(p: float) -> float:
    """
    Calculate the binary channel entropy given a probability p.
    Returns NaN if p is not in [0, 1].
    """
    if p < 0 or p > 1:
        return float("nan")
    elif p == 0 or p == 1:
        return 0.0
    else:
        return -p * np.log2(p) - (1 - p) * np.log2(1 - p)

class EIGAgent(LLMAgent):
    def _get_consistency_dict(self, question: str):
        prompt = CONSISTENCY_PROMPT.format(question=question, objects=list(self.samples.keys()))
        response = self.openrouter_agent(prompt)
        try:
            consistency_dict = ast.literal_eval(response)
            return consistency_dict
        except json.JSONDecodeError as e:
            print(f"Error parsing consistency dictionary: {e}")
            print(f"Response was: {response}")
            return {}

    def _calculate_eig(self, consistency_dict):
        weighted_results = {"yes": 0, "no": 0}
        for object, weight in self.samples.items():
            if object in consistency_dict:
                answer = consistency_dict[object]
                if answer == "yes":
                    weighted_results["yes"] += weight
                elif answer == "no":
                    weighted_results["no"] += weight
                else:
                    print(f"Unexpected answer '{answer}' for object '{object}'")
                    return float("nan")

        if any(v == 0 for v in weighted_results.values()):
            return 0

        # Calculate EIG using weighted probabilities
        total_weight = sum(weighted_results.values())
        p_true = weighted_results["yes"] / total_weight

        return binary_entropy(
            EPSILON + ((1 - 2 * EPSILON) * p_true)
        ) - binary_entropy(EPSILON)

    def _perform_regeneration(self):
        print("Performing regeneration of samples...")
        # Remove samples with weight below the threshold
        objects_to_replace = [obj for obj, weight in self.samples.items() if weight < (EPSILON)]

        # Remove low-weight samples
        for obj in objects_to_replace:
            del self.samples[obj]

        # Query for new samples
        prompt = REGEN_PROMPT.format(theme=self.ground_truth_theme, history=self.game_history)
        response = self.openrouter_agent(prompt)
        try:
            if '[' in response and ']' in response:
                start = response.find('[')
                end = response.rfind(']') + 1
                json_str = response[start:end]
                new_objects = json.loads(json_str)

                # Add new objects with initial weight 1
                for obj in new_objects:
                    obj = obj.lower().strip()
                    if obj not in self.samples:
                        self.samples[obj] = 1

                for sample, weight in self.samples.items():
                    self.samples[sample] = 1.0
                print(f"Regenerated {len(objects_to_replace)} samples, now have {len(self.samples)} samples.")
            else:
                print("Could not parse object list during regeneration, no new samples added.")
                
        except Exception as e:
            print(f"Error parsing samples during regeneration: {e}")
            print(f"Response was: {response}")


    def update_weights(self, question: str):
        #wayyy too brittle and does not work
        true_answer = self.game_history.strip().split("[GAME]")[-1].strip()
        print("true answer", true_answer)
        consistency_dict = self._get_consistency_dict(question)
        for object, weight in self.samples.items():
            if object in consistency_dict:
                answer = consistency_dict[object]
                if answer.lower() == true_answer.lower():
                    self.samples[object] = weight * (1 - EPSILON)
                else:
                    self.samples[object] = weight * EPSILON

    def question(self, history, k=5):
        if any([weight < (EPSILON) for weight in self.samples.values()]):
            self._perform_regeneration()

        question_list = []
        for _ in range(k):
            prompt = QUESTION_PROMPT.format(history=history)
            question = self.openrouter_agent(prompt)
            consistency_dict = self._get_consistency_dict(question)
            eig = self._calculate_eig(consistency_dict)
            question_list.append((question, eig))
        
        print(f"Question EIGs: {question_list}")

        best_question = sorted(question_list, key=lambda x: x[1], reverse=True)[0][0]

        return best_question

In [17]:
import textarena as ta
from dotenv import load_dotenv
import os

load_dotenv()

openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
if openrouter_api_key:
    os.environ["OPENROUTER_API_KEY"] = openrouter_api_key
else:
    raise ValueError("OPENROUTER_API_KEY is not set in the .env file.")

# Initialize base agent for 20 Questions
base_agent = ta.agents.OpenRouterAgent(model_name="openai/gpt-4o")

# Initialize the 20 Questions environment
env = ta.make(env_id="TwentyQuestions-v0")

# Change the gamemaster model (before reset)
env.gamemaster = ta.agents.OpenRouterAgent(model_name="openai/gpt-4o")
print(f"Gamemaster model: {env.gamemaster.model_name}")

# Reset the environment for single player
env.reset(num_players=1)

# Extract ground truth after reset
ground_truth_word = env.game_word
ground_truth_theme = env.game_theme

print(f"🎯 GROUND TRUTH: '{ground_truth_word}' (theme: {ground_truth_theme})")

# Initialize LLM agent with the ground truth theme
llm_agent = EIGAgent(base_agent, ground_truth_theme)
print(f"Sample objects: {list(llm_agent.samples.keys())[:10]}...")  # Show first 10
print("=" * 50)

done = False
turn_count = 0

while not done and turn_count < 25:  # Safety limit
    player_id, observation = env.get_observation()

    print("Current samples:", llm_agent.samples)
    
    print(f"Player {player_id} turn {turn_count}")
    print(f"Observation:\n{observation}")
    print("-" * 50)

    # Get action from LLM agent - it handles decision making internally
    action = llm_agent(observation)  # This calls __call__ method
    print(f"Action: {action}")
    print("-" * 50)
    
    # Step the environment
    done, step_info = env.step(action=action)
    print(f"Done: {done}, Step info: {step_info}")
    print("=" * 50)

    if "?" in action and type(llm_agent) == EIGAgent:
        llm_agent.update_weights(action)

    turn_count += 1

# Get final results
rewards, game_info = env.close()
print(f"🎯 Ground truth was: '{ground_truth_word}' (theme: {ground_truth_theme})")
print(f"Final rewards: {rewards}")
print(f"Game info: {game_info}")

Gamemaster model: openai/gpt-4o
🎯 GROUND TRUTH: 'businessperson' (theme: people)
Initialized EIGAgent with 46 samples for theme 'people'
Sample objects: ['teacher', 'doctor', 'chef', 'engineer', 'nurse', 'artist', 'scientist', 'musician', 'actor', 'farmer']...
Current samples: {'teacher': 1, 'doctor': 1, 'chef': 1, 'engineer': 1, 'nurse': 1, 'artist': 1, 'scientist': 1, 'musician': 1, 'actor': 1, 'farmer': 1, 'writer': 1, 'pilot': 1, 'firefighter': 1, 'police officer': 1, 'athlete': 1, 'lawyer': 1, 'dancer': 1, 'manager': 1, 'politician': 1, 'banker': 1, 'surgeon': 1, 'dentist': 1, 'mechanic': 1, 'librarian': 1, 'architect': 1, 'carpenter': 1, 'plumber': 1, 'electrician': 1, 'designer': 1, 'photographer': 1, 'journalist': 1, 'veterinarian': 1, 'astronaut': 1, 'historian': 1, 'economist': 1, 'psychologist': 1, 'judge': 1, 'accountant': 1, 'biologist': 1, 'bartender': 1, 'soldier': 1, 'planner': 1, 'merchant': 1, 'geographer': 1, 'psychiatrist': 1, 'salesperson': 1}
Player 0 turn 0
Obser

KeyboardInterrupt: 