In [1]:
!pip install textarena smithery anthropic httpx mcp --upgrade

Collecting textarena
  Downloading TextArena-0.5.8-py3-none-any.whl.metadata (16 kB)
Downloading TextArena-0.5.8-py3-none-any.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: textarena
  Attempting uninstall: textarena
    Found existing installation: TextArena 0.5.6
    Uninstalling TextArena-0.5.6:
      Successfully uninstalled TextArena-0.5.6
Successfully installed textarena-0.5.8

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


In [2]:
import os
os.environ["ANTHROPIC_API_KEY"] = "sk..."
os.environ["E2B_API_KEY"] = "e2b..."

In [3]:
from textarena.core import Agent
import textarena as ta
import asyncio
from typing import Optional

STANDARD_GAME_PROMPT = "You are a competitive game player. Make sure you read the game instructions carefully, and always follow the required format."

class AsyncAnthropicAgent(Agent):
    """Agent class using the Anthropic Claude API to generate responses asynchronously."""
    def __init__(self, model_name: str, system_prompt: Optional[str] = STANDARD_GAME_PROMPT, max_tokens: int = 1000, temperature: float = 0.9, verbose: bool = False):
        """
        Initialize the Anthropic agent.

        Args:
            model_name (str): The name of the Claude model (e.g., "claude-3-5-sonnet-20241022").
            system_prompt (Optional[str]): The system prompt to use (default: STANDARD_GAME_PROMPT).
            max_tokens (int): The maximum number of tokens to generate.
            temperature (float): The temperature for randomness in response generation.
            verbose (bool): If True, additional debug info will be printed.
        """
        super().__init__()
        self.model_name = model_name
        self.system_prompt = system_prompt
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.verbose = verbose
        
        try:
            import anthropic
        except ImportError:
            raise ImportError(
                "Anthropic package is required for AsyncAnthropicAgent. "
                "Install it with: pip install anthropic"
            )
            
        self.client = anthropic.AsyncAnthropic()
    
    async def _make_request(self, observation: str) -> str:
        """Make a single API request to Anthropic and return the generated message."""
        response = await self.client.messages.create(
            model=self.model_name,
            max_tokens=self.max_tokens,
            temperature=self.temperature,
            system=self.system_prompt,
            messages=[
                {"role": "user", "content": [{"type": "text", "text": observation}]}
            ]
        )
        
        return response.content[0].text.strip()
    
    async def _retry_request(self, observation: str, retries: int = 3, delay: int = 5) -> str:
        """
        Attempt to make an API request with retries.

        Args:
            observation (str): The input to process.
            retries (int): The number of attempts to try.
            delay (int): Seconds to wait between attempts.

        Raises:
            Exception: The last exception caught if all retries fail.
        """
        last_exception = None
        for attempt in range(1, retries + 1):
            try:
                response = await self._make_request(observation)
                if self.verbose:
                    print(f"\nObservation: {observation}\nResponse: {response}")
                return response
            except Exception as e:
                last_exception = e
                print(f"Attempt {attempt} failed with error: {e}")
                if attempt < retries:
                    await asyncio.sleep(delay)
        raise last_exception
    
    async def __call__(self, observation: str) -> str:
        """
        Process the observation using the Anthropic API and return the generated response.
        
        Args:
            observation (str): The input string to process.
        
        Returns:
            str: The generated response.
        """
        if not isinstance(observation, str):
            raise ValueError(f"Observation must be a string. Received type: {type(observation)}")
        return await self._retry_request(observation)

In [5]:
import textarena as ta
import smithery
import mcp
import os
import json


class MCPAgent(AsyncAnthropicAgent):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.url = smithery.create_smithery_url(
            "wss://server.smithery.ai/e2b/ws", {"e2bApiKey": os.environ["E2B_API_KEY"]}
        )

    async def _make_request(self, observation: str) -> str:
        """Make a single API request to Anthropic and return the generated message."""
        async with smithery.websocket_client(self.url) as streams:
            async with mcp.client.session.ClientSession(*streams) as session:

                try:
                    tools_result = await session.list_tools()
                    tools = tools_result.model_dump()["tools"]

                    tools = [
                        {"input_schema": tool.pop("inputSchema"), **tool}
                        for tool in tools
                        if "inputSchema" in tool
                    ]

                    print("Available tools:", tools)

                    final_response_text = ""
                    is_tool_call_pending = True
                    messages = [
                        {
                            "role": "user",
                            "content": [{"type": "text", "text": observation}],
                        }
                    ]

                    # Loop to handle multiple tool calls in a conversation
                    while is_tool_call_pending:
                        response = await self.client.messages.create(
                            model=self.model_name,
                            max_tokens=self.max_tokens,
                            temperature=self.temperature,
                            system=self.system_prompt,
                            messages=messages,
                            tools=tools,
                        )

                        print("Response:", response)

                        # Check if there's a tool_use in the response
                        is_tool_call_pending = False
                        for content_block in response.content:
                            if content_block.type == "tool_use":
                                is_tool_call_pending = True

                                tool_name = content_block.name
                                tool_input = content_block.input
                                tool_id = content_block.id

                                print(f"Tool called: {tool_name}")
                                print(f"Tool input: {json.dumps(tool_input, indent=2)}")

                                # Execute the tool using MCP session
                                try:
                                    tool_result = await session.call_tool(
                                        tool_name, tool_input
                                    )

                                    # Convert tool result to string format for Anthropic
                                    # The content must be a string, not an object
                                    tool_result_dict = tool_result.model_dump()
                                except Exception as e:
                                    if "MCP error" in str(e):
                                        tool_result_dict = {"error": str(e)}

                                result_str = json.dumps(tool_result_dict)
                                print(f"Tool result: {result_str}")

                                # Add tool call and result to messages
                                messages.append(
                                    {
                                        "role": "assistant",
                                        "content": [content_block.model_dump()],
                                    }
                                )

                                # Add tool response to messages - content must be a string
                                messages.append(
                                    {
                                        "role": "user",
                                        "content": [
                                            {
                                                "type": "tool_result",
                                                "tool_use_id": tool_id,
                                                "content": result_str,  # Now it's a string
                                            }
                                        ],
                                    }
                                )
                            elif content_block.type == "text":
                                # Accumulate text responses
                                final_response_text += content_block.text

                        # If no tool calls were made, we use the text response
                        if not is_tool_call_pending and not final_response_text:
                            final_response_text = response.content[0].text

                except Exception as e:

                    print(f"Error: {e}")
                    raise e

            return final_response_text.strip()

In [6]:
import textarena as ta

# Initialize agents
agents = {
    0: MCPAgent(model_name="claude-3-7-sonnet-20250219"),
    1: AsyncAnthropicAgent(model_name="claude-3-5-haiku-20241022"),
}

# Initialize environment from subset and wrap it
env = ta.make(env_id="SimpleNegotiation-v0")
env = ta.wrappers.LLMObservationWrapper(env=env)
env = ta.wrappers.SimpleRenderWrapper(
    env=env,
    player_names={0: "sonnet", 1: "haiku"},
)

env.reset(num_players=len(agents))
done = False
while not done:
    player_id, observation = env.get_observation()
    action = await agents[player_id](observation)
    done, info = env.step(action=action)
    print("step complete")
rewards = env.close()
print(rewards)

Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_011pxhZEfJJX7BQG1KmS67Bs', content=[TextBlock(citations=None, text="I'll play the Negotiation Game as Player 0. Let me first analyze my current resources and their values to determine good trading strategies.", type='text'), ToolUseBlock(id='toolu_01XQCKNCQ1wkDSi6d2KyjX7J', input={'code': '# Analyze my current resources and their values\nresources = {\n    "Wheat": {"qty": 5, "value": 5, "value_per_unit": 5/5},\n    "Wood": {"qty": 17, "value": 12, "value_per_unit": 12/17},\n    "Sheep": {"qty": 18, "value": 12, "value_per_unit": 12/18},\n    "Brick": {"qty": 14, "value": 28, "value_per_unit": 28/14},\n    "Ore": {"qty": 14, "value": 39, "val

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01NQH3JenPJowPX7di1f4ei8', content=[TextBlock(citations=None, text='I need to analyze this counter-offer carefully to see if it makes sense for me. Let me evaluate the value exchange of this trade offer.', type='text'), ToolUseBlock(id='toolu_011rRuq29doUGK93bemoU2Jc', input={'code': '# My current resources\nresources = {\n    "Wheat": {"Qty": 5, "Value": 5},\n    "Wood": {"Qty": 17, "Value": 12},\n    "Sheep": {"Qty": 18, "Value": 12},\n    "Brick": {"Qty": 14, "Value": 28},\n    "Ore": {"Qty": 14, "Value": 39}\n}\n\n# Calculate value per unit for each resource\nfor resource, data in resources.items():\n    value_per_unit = dat

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01FM4xDyP6QJApESaNFrkCUZ', content=[TextBlock(citations=None, text="I need to analyze this counter-offer carefully to determine if it's beneficial for me. Let me calculate the exact values involved.", type='text'), ToolUseBlock(id='toolu_01UHjshWAt61G4V3hQFpVrS2', input={'code': '# My current resources\nresources = {\n    "Wheat": {"Qty": 5, "Value": 5},\n    "Wood": {"Qty": 17, "Value": 12},\n    "Sheep": {"Qty": 18, "Value": 12},\n    "Brick": {"Qty": 14, "Value": 28},\n    "Ore": {"Qty": 14, "Value": 39}\n}\n\n# Calculate value per unit for each resource\nfor resource, data in resources.items():\n    value_per_unit = data["Va

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01W6N973aHUti9CGd34b391C', content=[TextBlock(citations=None, text="I'll help you analyze your updated resources after the trade and determine a good strategy for your next move. Let me calculate the new resource totals and values.", type='text'), ToolUseBlock(id='toolu_01Q1qHWZ3bPh3LRg9LxK2Gnu', input={'code': '# Original resources before trade\nresources_before = {\n    "Wheat": {"Qty": 5, "Value": 5},\n    "Wood": {"Qty": 17, "Value": 12},\n    "Sheep": {"Qty": 18, "Value": 12},\n    "Brick": {"Qty": 14, "Value": 28},\n    "Ore": {"Qty": 14, "Value": 39}\n}\n\n# Calculate value per unit for each resource\nfor resource, detail

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_014QGSppFP6fatXratV4gbQJ', content=[TextBlock(citations=None, text="I'll analyze this counter-offer from Player 1 to determine if it makes sense for me economically.", type='text'), ToolUseBlock(id='toolu_01AGYF7kJtpVt9PqZBvKmnt7', input={'code': '# Current resources after first trade\nwheat = 5    # Qty: 5, Value: 5 (1.00 per unit)\nwood = 14    # Qty: 14, Value: 9.88 (0.71 per unit)\nsheep = 14   # Qty: 14, Value: 9.33 (0.67 per unit)\nbrick = 14   # Qty: 14, Value: 28.00 (2.00 per unit)\nore = 17     # Qty: 17, Value: 47.36 (2.79 per unit)\n\n# Per unit values\nwheat_value = 1.00\nwood_value = 0.71\nsheep_value = 0.67\nbrick_

step complete


step complete
