In [24]:
%pip install -Uq boto3 anthropic

Note: you may need to restart the kernel to use updated packages.


In [None]:
%pip install -r requirements.txt

In [1]:
from dotenv import load_dotenv
from utils.visualize import visualize
from typing import List, Dict,Optional
load_dotenv()

# MODEL= 'global.anthropic.claude-opus-4-5-20251101-v1:0'
MODEL = 'global.anthropic.claude-sonnet-4-5-20250929-v1:0'

viz = visualize(auto_show=True)

In [2]:
import json

from utils.team_expense_api import get_custom_budget, get_expenses, get_team_members

from anthropic import AnthropicBedrock

client = AnthropicBedrock()

message = client.messages.create(
    model=MODEL,
    max_tokens=256,
    messages=[{"role": "user", "content": "Hello, world"}]
)
print(message.content)

[TextBlock(citations=None, text='Hello! How can I help you today?', type='text')]


In [3]:

# Tool definitions for the team expense API
tools = [
    {
        "name": "get_team_members",
        "description": 'Returns a list of team members for a given department. Each team member includes their ID, name, role, level (junior, mid, senior, staff, principal), and contact information. Use this to get a list of people whose expenses you want to analyze. Available departments are: engineering, sales, and marketing.\n\nRETURN FORMAT: Returns a JSON string containing an ARRAY of team member objects (not wrapped in an outer object). Parse with json.loads() to get a list. Example: [{"id": "ENG001", "name": "Alice", ...}, {"id": "ENG002", ...}]',
        "input_schema": {
            "type": "object",
            "properties": {
                "department": {
                    "type": "string",
                    "description": "The department name. Case-insensitive.",
                }
            },
            "required": ["department"],
        },
        # "input_examples": [
        #     {"department": "engineering"},
        #     {"department": "sales"},
        #     {"department": "marketing"},
        # ],
    },
    {
        "name": "get_expenses",
        "description": "Returns all expense line items for a given employee in a specific quarter. Each expense includes extensive metadata: date, category, description, amount (in USD), currency, status (approved, pending, rejected), receipt URL, approval chain, merchant name and location, payment method, and project codes. An employee may have 20-50+ expense line items per quarter, and each line item contains substantial metadata for audit and compliance purposes. Categories include: 'travel' (flights, trains, rental cars, taxis, parking), 'lodging' (hotels, airbnb), 'meals', 'software', 'equipment', 'conference', 'office', and 'internet'. IMPORTANT: Only expenses with status='approved' should be counted toward budget limits.\n\nRETURN FORMAT: Returns a JSON string containing an ARRAY of expense objects (not wrapped in an outer object with an 'expenses' key). Parse with json.loads() to get a list directly. Example: [{\"expense_id\": \"ENG001_Q3_001\", \"amount\": 1250.50, \"category\": \"travel\", ...}, {...}]",
        "input_schema": {
            "type": "object",
            "properties": {
                "employee_id": {
                    "type": "string",
                    "description": "The unique employee identifier",
                },
                "quarter": {
                    "type": "string",
                    "description": "Quarter identifier: 'Q1', 'Q2', 'Q3', or 'Q4'",
                },
            },
            "required": ["employee_id", "quarter"],
        },
        # "input_examples": [
        #     {"employee_id": "ENG001", "quarter": "Q3"},
        #     {"employee_id": "SAL002", "quarter": "Q1"},
        #     {"employee_id": "MKT001", "quarter": "Q4"},
        # ],
    },
    {
        "name": "get_custom_budget",
        "description": 'Get the custom quarterly travel budget for a specific employee. Most employees have a standard $5,000 quarterly travel budget. However, some employees have custom budget exceptions based on their role requirements. This function checks if a specific employee has a custom budget assigned.\n\nRETURN FORMAT: Returns a JSON string containing a SINGLE OBJECT (not an array). Parse with json.loads() to get a dict. Example: {"user_id": "ENG001", "has_custom_budget": false, "travel_budget": 5000, "reason": "Standard", "currency": "USD"}',
        "input_schema": {
            "type": "object",
            "properties": {
                "user_id": {
                    "type": "string",
                    "description": "The unique employee identifier",
                }
            },
            "required": ["user_id"],
        },
        # "input_examples": [
        #     {"user_id": "ENG001"},
        #     {"user_id": "SAL002"},
        #     {"user_id": "MKT001"},
        # ],
    },
]

tool_functions = {
    "get_team_members": get_team_members,
    "get_expenses": get_expenses,
    "get_custom_budget": get_custom_budget,
}

### Traditional Tool Calling (Baseline)
In this first example, we'll use traditional tool calling to establish our baseline.

We'll call the messages.create API with our initial query. When the model stops with a tool_use reason, we will execute the tool as requested, and then add the output from the tool to the messages and call the model again.

In [4]:
import time

from anthropic.types import TextBlock, ToolUseBlock
from anthropic.types.beta import (
    BetaMessageParam as MessageParam,
)
from anthropic.types.beta import (
    BetaTextBlock,
    BetaToolUseBlock,
)


In [5]:

messages: list[MessageParam] = []


def run_agent_without_ptc(user_message):
    """Run agent using traditional tool calling"""
    messages.append({"role": "user", "content": user_message})
    total_tokens = 0
    start_time = time.time()
    api_counter = 0

    while True:
        response = client.beta.messages.create(
            model=MODEL,
            max_tokens=8000,
            tools=tools,
            messages=messages,
            # betas=["tool-examples-2025-10-29"],
        )

        api_counter += 1

        # Track token usage
        total_tokens += response.usage.input_tokens + response.usage.output_tokens
        viz.capture(response)
        if response.stop_reason == "end_turn":
            # Extract the first text block from the response
            final_response = next(
                (
                    block.text
                    for block in response.content
                    if isinstance(block, (BetaTextBlock, TextBlock))
                ),
                None,
            )
            elapsed_time = time.time() - start_time
            return final_response, messages, total_tokens, elapsed_time, api_counter

        # Process tool calls
        if response.stop_reason == "tool_use":
            # First, add the assistant's response to messages
            messages.append({"role": "assistant", "content": response.content})

            # Collect all tool results
            tool_results = []

            for block in response.content:
                if isinstance(block, (BetaToolUseBlock, ToolUseBlock)):
                    tool_name = block.name
                    tool_input = block.input
                    tool_use_id = block.id

                    result = tool_functions[tool_name](**tool_input)

                    content = str(result)

                    tool_result = {
                        "type": "tool_result",
                        "tool_use_id": tool_use_id,
                        "content": content,
                    }
                    tool_results.append(tool_result)

            # Append all tool results at once after collecting them
            messages.append({"role": "user", "content": tool_results})

        else:
            print(f"\nUnexpected stop reason: {response.stop_reason}")
            elapsed_time = time.time() - start_time

            final_response = next(
                (
                    block.text
                    for block in response.content
                    if isinstance(block, (BetaTextBlock, TextBlock))
                ),
                f"Stopped with reason: {response.stop_reason}",
            )
            return final_response, messages, total_tokens, elapsed_time, api_counter

In [6]:
query = "Which engineering team members exceeded their Q3 travel budget? Standard quarterly travel budget is $5,000. However, some employees have custom budget limits. For anyone who exceeded the $5,000 standard budget, check if they have a custom budget exception. If they do, use that custom limit instead to determine if they truly exceeded their budget."


In [7]:
# Run the agent
result, conversation, total_tokens, elapsed_time, api_count_without_ptc = run_agent_without_ptc(
    query
)

print(f"Result: {result}")
print(f"API calls made: {api_count_without_ptc}")
print(f"Total tokens used: {total_tokens:,}")
print(f"Total time taken: {elapsed_time:.2f}s")

Result: Now let me analyze the data. I need to calculate approved travel expenses (categories: 'travel' and 'lodging' with status='approved') for each engineer:

**ENG001 (Alice Chen) - Standard Budget: $5,000**
- Travel: $1,283.95 + $196.76 + $186.75 + $1,017.20 + $28.78 + $1,060.97 + $1,051.26 + $15.96 + $175.48 + $1,492.55 = $6,509.66
- Lodging: $1,621.08 + $1,166.78 = $2,787.86
- **Total: $9,297.52** ✅ EXCEEDED $5,000

**ENG002 (Bob Martinez) - Custom Budget: $8,000**
- Travel: $710.22 + $24.55 + $1,216.45 + $11.18 + $1,128.86 + $178.26 + $1,167.89 + $1,276.95 + $1,484.08 + $1,435.73 = $10,634.17
- Lodging: $1,494.16 + $705.56 = $2,199.72
- **Total: $12,833.89** ✅ EXCEEDED $8,000 custom budget

**ENG003 (Carol White) - Standard Budget: $5,000**
- Travel: $35.74 + $1,150.98 + $1,429.90 + $192.77 + $12.77 + $156.78 + $163.89 + $1,352.47 + $1,452.83 + $949.18 = $6,897.31
- Lodging: $1,512.23 + $808.68 + $1,273.64 = $3,594.55
- **Total: $10,491.86** ✅ EXCEEDED $5,000

**ENG004 (David K

# Customized PTC

In [48]:
!python examples/bedrock_docker_agent_example.py -v

2025-12-29 08:17:03,051 - asyncio - selector_events.py:64 - DEBUG - Using selector: EpollSelector
Bedrock Docker Sandbox Agent Demo
2025-12-29 08:17:03,051 - __main__ - bedrock_docker_agent_example.py:324 - INFO - Registered tool: get_team_members
2025-12-29 08:17:03,051 - __main__ - bedrock_docker_agent_example.py:324 - INFO - Registered tool: get_expenses
2025-12-29 08:17:03,051 - __main__ - bedrock_docker_agent_example.py:324 - INFO - Registered tool: get_custom_budget
2025-12-29 08:17:03,051 - __main__ - bedrock_docker_agent_example.py:342 - INFO - Registered direct tool: get_weather
2025-12-29 08:17:03,051 - __main__ - bedrock_docker_agent_example.py:302 - INFO - Initialized BedrockDockerSandboxAgent with model: global.anthropic.claude-sonnet-4-5-20250929-v1:0
2025-12-29 08:17:03,051 - __main__ - bedrock_docker_agent_example.py:346 - INFO - Docker sandbox agent ready

Docker Sandbox Agent created with 4 tools:
  - get_team_members (code_execution)
  - get_expenses (code_execution)

2025-12-29 08:17:03,471 - __main__ - bedrock_docker_agent_example.py:382 - INFO - AnthropicBedrock client initialized
[36m╭─[0m[36m───────────────────────────[0m[36m [0m[1;36mClaude API Response[0m[36m [0m[36m────────────────────────────[0m[36m─╮[0m
[36m│[0m [1;36mClaude Message[0m ([32massistant[0m) [2;37m│[0m [35mtokens:[0m [36m3,002[0m in • [32m1,098[0m out • [33m4,100[0m total      [36m│[0m
[36m│[0m ├── [2;37mModel:[0m claude-sonnet-4-5-20250929                                        [36m│[0m
[36m│[0m ├── [2;37mStop Reason:[0m tool_use                                                    [36m│[0m
[36m│[0m └── [1;37mContent[0m (2 blocks)                                                       [36m│[0m
[36m│[0m     ├── [2;37mBlock 1[0m                                                              [36m│[0m
[36m│[0m     │   └── [36mText[0m                                                             [36m│[0m
[36m│[0m     │      

## Test for Antropic API Proxy

In [8]:
import copy

ptc_tools = copy.deepcopy(tools)
for tool in ptc_tools:
    tool["allowed_callers"] = ["code_execution_20250825"]  # type: ignore


# Add the code execution tool
ptc_tools.append(
    {
        "type": "code_execution_20250825",  # type: ignore
        "name": "code_execution",
    }
)


In [9]:
import anthropic
messages = []


anthropic_client = anthropic.Anthropic(api_key='sk-22b986366e084cafae975331ae994e8a',
                                       base_url='http://127.0.0.1:8000')

message = anthropic_client.beta.messages.create(
    model="qwen.qwen3-coder-480b-a35b-v1:0",
    # model="global.anthropic.claude-opus-4-5-20251101-v1:0",
    # model = 'global.anthropic.claude-sonnet-4-5-20250929-v1:0',
    # model = 'minimax.minimax-m2',
    max_tokens=8000,
    messages=[
        {"role": "user", "content": "你好，Claude！"}
    ]
)

print(message.content)

[BetaTextBlock(citations=None, text='你好！不过你可能弄错了，我是通义千问，不是Claude。Claude是由Anthropic公司开发的另一个AI模型。有什么我可以帮你的吗？', type='text', cache_control=None)]


In [11]:


def run_agent_with_ptc(user_message,model_id):
    """Run agent using PTC"""
    messages = []
    messages.append({"role": "user", "content": user_message})
    total_tokens = 0
    start_time = time.time()
    container_id = None
    api_counter = 0

    while True:
        # Build request with PTC beta headers
        request_params = {
            "model":model_id,
            "max_tokens": 8000,
            "tools": ptc_tools,
            "messages": messages,
        }

        response = anthropic_client.beta.messages.create(
            **request_params,
            betas=[
                "advanced-tool-use-2025-11-20",
            ],
            extra_body={"container": container_id} if container_id else None,
        )
        viz.capture(response)
        api_counter += 1

        # Track container for stateful execution
        if hasattr(response, "container") and response.container:
            container_id = response.container.id
            print(f"\n[Container] ID: {container_id}")
            if hasattr(response.container, "expires_at"):
                # If the container has expired, we would need to restart our workflow. In our case, it completes before expiration.
                print(f"[Container] Expires at: {response.container.expires_at}")

        # Track token usage
        total_tokens += response.usage.input_tokens + response.usage.output_tokens

        if response.stop_reason == "end_turn":
            # Extract the first text block from the response
            final_response = next(
                (block.text for block in response.content if isinstance(block, BetaTextBlock)),
                None,
            )
            elapsed_time = time.time() - start_time
            return final_response, messages, total_tokens, elapsed_time, api_counter

        # As before, we process tool calls
        if response.stop_reason == "tool_use":
            # First, add the assistant's response to messages
            messages.append({"role": "assistant", "content": response.content})

            # Collect all tool results
            tool_results = []

            for block in response.content:
                if isinstance(block, BetaToolUseBlock):
                    tool_name = block.name
                    tool_input = block.input
                    tool_use_id = block.id

                    # We can use caller type to understand how the tool was invoked
                    caller_type = block.caller.type  # type: ignore
                    if caller_type == "code_execution_20250825":
                        print(f"[PTC] Tool called from code execution environment: {tool_name}")
        
                    elif caller_type == "direct":
                        print(f"[Direct] Tool called by model: {tool_name}")

                    result = tool_functions[tool_name](**tool_input)

                    # Format result as proper content for the API
                    if isinstance(result, list) and result and isinstance(result[0], str):
                        content = "\n".join(result)
                    elif isinstance(result, (dict, list)):
                        content = json.dumps(result)
                    else:
                        content = str(result)

                    tool_results.append(
                        {
                            "type": "tool_result",
                            "tool_use_id": tool_use_id,
                            "content": content,
                        }
                    )

            messages.append({"role": "user", "content": tool_results})

        else:
            print(f"\nUnexpected stop reason: {response.stop_reason}")
            elapsed_time = time.time() - start_time

            final_response = next(
                (block.text for block in response.content if isinstance(block, BetaTextBlock)),
                f"Stopped with reason: {response.stop_reason}",
            )
            return final_response, messages, total_tokens, elapsed_time, api_counter

In [15]:
# Run the PTC agent
# model_id = 'minimax.minimax-m2'
model_id = 'qwen.qwen3-coder-480b-a35b-v1:0'
# model_id="global.anthropic.claude-opus-4-5-20251101-v1:0"
# model_id = 'global.anthropic.claude-sonnet-4-5-20250929-v1:0'
query = "Which engineering team members exceeded their Q3 travel budget? Standard quarterly travel budget is $5,000. However, some employees have custom budget limits. For anyone who exceeded the $5,000 standard budget, check if they have a custom budget exception. If they do, use that custom limit instead to determine if they truly exceeded their budget."

result_ptc, conversation_ptc, total_tokens_ptc, elapsed_time_ptc, api_count_with_ptc = (
    run_agent_with_ptc(query,model_id)
)

print(f"Result: {result_ptc}")
print(f"API calls made: {api_count_with_ptc}")
print(f"Total tokens used: {total_tokens_ptc:,}")
print(f"Total time taken: {elapsed_time_ptc:.2f}s")


[Container] ID: container_01b3c6208f5f
[Container] Expires at: 2025-12-29 08:43:05.394485
[PTC] Tool called from code execution environment: get_team_members



[Container] ID: container_01b3c6208f5f
[Container] Expires at: 2025-12-29 08:43:08.770689
[PTC] Tool called from code execution environment: get_expenses
[PTC] Tool called from code execution environment: get_expenses
[PTC] Tool called from code execution environment: get_expenses
[PTC] Tool called from code execution environment: get_expenses
[PTC] Tool called from code execution environment: get_expenses
[PTC] Tool called from code execution environment: get_expenses
[PTC] Tool called from code execution environment: get_expenses
[PTC] Tool called from code execution environment: get_expenses



[Container] ID: container_01b3c6208f5f
[Container] Expires at: 2025-12-29 08:43:11.825921
[PTC] Tool called from code execution environment: get_custom_budget
[PTC] Tool called from code execution environment: get_custom_budget
[PTC] Tool called from code execution environment: get_custom_budget
[PTC] Tool called from code execution environment: get_custom_budget
[PTC] Tool called from code execution environment: get_custom_budget
[PTC] Tool called from code execution environment: get_custom_budget



[Container] ID: container_01b3c6208f5f
[Container] Expires at: 2025-12-29 08:43:18.654208
Result: # Engineering Team Members Who Exceeded Their Q3 Travel Budget

Based on my analysis of Q3 travel expenses for the engineering team, the following members exceeded their allocated travel budgets:

1. **ENG005**: Spent $7,890.50 > Budget $5,000 (Over by $2,890.50)
2. **ENG004**: Spent $13,450.00 > Budget $12,000 (Over by $1,450.00)
3. **ENG001**: Spent $6,420.71 > Budget $5,000 (Over by $1,420.71)
4. **ENG002**: Spent $9,215.30 > Budget $8,000 (Over by $1,215.30)
5. **ENG007**: Spent $6,125.00 > Budget $5,000 (Over by $1,125.00)
6. **ENG003**: Spent $5,620.25 > Budget $5,000 (Over by $620.25)

Note: Some employees (ENG002 and ENG004) had custom budget exceptions due to their roles, but still exceeded even those higher limits. Others were working with the standard $5,000 quarterly travel budget.
API calls made: 4
Total tokens used: 9,967
Total time taken: 18.62s
