In [None]:
import requests
import json
import os
from datetime import datetime

## NOTE: These examples won't work because the environments have been disabled in the API
## but you can see the plots in the ./example folder
# task_path = "example/other_example.json"

task_path = "example/finance_example_2.json"

with open(task_path, 'r') as file:
    task_json = json.load(file)

# Add the task file name to the request so the server can include it in the filename
task_json['task_file_name'] = task_path

# API_BASE_URL= "https://tau-bench.turing.com"
API_BASE_URL= "http://localhost:8000"

# Choose which endpoint to use
endpoint_options = {
    "compute_complexity": f"{API_BASE_URL}/compute_complexity",
    "task_verification": f"{API_BASE_URL}/task_verification", 
    "run_task": f"{API_BASE_URL}/run-task"
}

# Select the endpoint you want to use
selected_endpoint = "run_task"  # Change this to "compute_complexity", "task_verification", or "run_task" as needed
endpoint_url = endpoint_options[selected_endpoint]

print(f"Using endpoint: {endpoint_url}")

response = requests.post(
    endpoint_url,
    json=task_json,
    headers={"Content-Type": "application/json"}
)

# Handle different response types based on endpoint
if selected_endpoint == "run_task" and response.status_code == 200:
    # For run-task endpoint, save the downloaded file
    
    # Create results directory if it doesn't exist
    results_dir = "results"
    os.makedirs(results_dir, exist_ok=True)
    
    # Extract filename from Content-Disposition header if available
    content_disposition = response.headers.get('Content-Disposition', '')
    if 'filename=' in content_disposition:
        original_filename = content_disposition.split('filename=')[1].strip('"')
    else:
        original_filename = None
    
    # Use the filename from the server's Content-Disposition header if available
    if original_filename:
        filename = original_filename
    else:
        # Fallback filename (shouldn't be needed with updated server)
        timestamp = datetime.now().strftime("%m%d%H%M%S")
        
        # Extract task information to create meaningful filename
        task_info = ""
        if task_json.get('environment'):
            env_name = task_json['environment'].replace('_', '-')
            task_info = f"{env_name}_"
        elif 'model' in task_json:
            task_info = f"{task_json['model']}_"
        
        filename = f"tool-calling-{task_info}range_0--1_user-task_llm_{timestamp}.json"
    
    # Save the raw file first
    raw_file_path = os.path.join(results_dir, filename)
    with open(raw_file_path, 'wb') as f:
        f.write(response.content)
    
    # Load and display the JSON content as-is (no transformation)
    try:
        with open(raw_file_path, 'r') as f:
            response_data = json.load(f)
            print("Response JSON loaded successfully")
                
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        with open(raw_file_path, 'r') as f:
            print("First 500 characters of response:")
            print(repr(f.read(500)))
        response_data = None
        
else:
    # For other endpoints, handle as JSON response
    try:
        response_data = response.json()
        print("JSON response received")
    except json.JSONDecodeError:
        print("Error: Response is not valid JSON")
        print("Response content:", response.text[:500])  # Show first 500 chars
        response_data = None

Using endpoint: http://localhost:8000/run-task
Response JSON loaded successfully
Response JSON loaded successfully


In [24]:
# Display the raw response data exactly as received
if response_data is not None:
    print(json.dumps(response_data, indent=2))
        
else:
    print("No response data available")
    
results_dir = "results"
if os.path.exists(results_dir):
    files = os.listdir(results_dir)
    if files:
        for file in sorted(files):
            file_path = os.path.join(results_dir, file)
            file_size = os.path.getsize(file_path)
            print(f"  - {file} ({file_size} bytes)")
    else:
        print("No files found")
else:
    print("Results directory does not exist")

[
  {
    "task_id": 0,
    "reward": 0.0,
    "info": {
      "task": {
        "user_id": "111",
        "actions": [
          {
            "name": "query_users",
            "kwargs": {
              "email": "william.estrada@investment.com"
            }
          },
          {
            "name": "list_funds_with_filter",
            "kwargs": {
              "manager_id": "111",
              "name": "Sanders, Miller and Murphy Growth Fund"
            }
          },
          {
            "name": "validate_approval",
            "kwargs": {
              "approval_code": "FUNDU0215"
            }
          },
          {
            "name": "update_fund",
            "kwargs": {
              "field_name": "size",
              "field_value": 6842163.52,
              "fund_id": "97",
              "fund_manager_approval": true
            }
          },
          {
            "name": "add_audit_trail",
            "kwargs": {
              "action": "update",
             

In [25]:
results_dir = "results"
if os.path.exists(results_dir):
    files = os.listdir(results_dir)
    if files:
        for file in sorted(files):
            file_path = os.path.join(results_dir, file)
            file_size = os.path.getsize(file_path)
            print(f"  - {file} ({file_size} bytes)")
    else:
        print("No files found")
else:
    print("Results directory does not exist")

# Check if we have response_data from the previous cell
if 'response_data' in globals() and response_data is not None:
    
    # For compute_complexity and task_verification endpoints - look for plots
    if selected_endpoint in ["compute_complexity", "task_verification"]:
        if isinstance(response_data, dict) and 'plot_base64' in response_data and response_data['plot_base64'] is not None:
            from IPython.display import Image, display
            import base64
            
            plot_base64 = response_data['plot_base64']
            plot_image = base64.b64decode(plot_base64)
            display(Image(data=plot_image))
        else:
            print("No plot data found in response")
    
    # For run_task endpoint - show raw structure
    elif selected_endpoint == "run_task":
        print("Raw API response structure displayed above")
            
else:
    print("No response data available from previous cell")
    print("Please run the previous cells first")

  - tool-calling-gpt-4o-0.0_range_0--1_user-gpt-4o-llm_0827194618.json (28261 bytes)
  - tool-calling-o4-mini_range_0--1_user-task_llm_0905124908.json (82360 bytes)
Raw API response structure displayed above
