<a href="https://colab.research.google.com/github/wjleece/AI-Agents/blob/main/AI_Agents_w_Evals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install anthropic
%pip install openai
%pip install -q -U google-generativeai
%pip install fuzzywuzzy

Collecting anthropic
  Downloading anthropic-0.51.0-py3-none-any.whl.metadata (25 kB)
Downloading anthropic-0.51.0-py3-none-any.whl (263 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.0/264.0 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.51.0
Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [22]:
import anthropic
import google.generativeai as gemini
import re
import json
import time
from google.colab import userdata
from openai import OpenAI
from datetime import datetime
from typing import Dict, List, Any, Optional, Union, Tuple
from fuzzywuzzy import process, fuzz


ANTHROPIC_API_KEY = userdata.get('ANTHROPIC_API_KEY')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
openai_client = OpenAI(api_key=OPENAI_API_KEY)
gemini.configure(api_key=GOOGLE_API_KEY)

ANTHROPIC_MODEL_NAME = "claude-3-5-sonnet-latest"
OPENAI_MODEL_NAME = "gpt-4.1"
EVAL_MODEL_NAME = "gemini-2.5-pro-preview-05-06"

In [7]:
worker_system_prompt = """
You are a helpful customer service assistant for an e-commerce system.

When responding to the user, use the conversation context to maintain continuity.
- If a user refers to "my order" or similar, use the context to determine which order they're talking about.
- If they mention "that product" or use other references, check the context to determine what they're referring to.
- Always prioritize recent context over older context when resolving references.

The conversation context will be provided to you with each message. This includes:
- Previous questions and answers
- Recently viewed customers, products, and orders
- Recent actions taken (like creating orders, updating products, etc.)

Keep responses friendly, concise, and helpful. If you're not sure what a user is referring to, ask for clarification.
"""

evaluator_system_prompt = """
You are an impartial evaluator assessing the quality of responses from two AI assistants (Anthropic Claude and OpenAI GPT) to customer service queries.

For each interaction, evaluate both responses based on:
1. Accuracy: How correct and factual is the response based on the available information?
2. Efficiency: Did the assistant get to the correct answer with minimal clarifying questions?
3. Context Awareness: Did the assistant correctly use the conversation context to understand references?
4. Helpfulness: How well did the assistant address the user's needs?

Score each response on a scale of 1-10 for each criterion, and provide an overall score.

If you identify ambiguity in the user's query that neither assistant could reasonably resolve without additional information:
1. Flag this as requiring human clarification
2. Clearly state what information is needed
3. Ask an admin user for the necessary clarification

After receiving human clarification, continue your evaluation incorporating this new information.
Store this feedback as a "learning" so similar situations can be handled better in the future.

For testing purposes, you may be asked to identify which model you are. You should realize that type of question likely comes from
a human user and not from an AI assistant. Therefore you should properly identify yourself by stating which model you are, and,
if specifically asked, your key tasks.
"""

In [8]:
# The GenerativeModel instance for evaluation will be created with the system instruction.
eval_model_instance = gemini.GenerativeModel(
    model_name=EVAL_MODEL_NAME,
    system_instruction=evaluator_system_prompt
)

In [9]:
# Cell 5: Global Data Stores (Initial data - will be managed by the Storage class instance)
# These are initial values. The Storage class will manage them.
initial_customers = {
    "C1": {"name": "John Doe", "email": "john@example.com", "phone": "123-456-7890"},
    "C2": {"name": "Jane Smith", "email": "jane@example.com", "phone": "987-654-3210"}
}

initial_products = {
    "P1": {"name": "Widget A", "description": "A simple widget. Very compact.", "price": 19.99, "inventory_count": 999},
    "P2": {"name": "Gadget B", "description": "A powerful gadget. It spins.", "price": 49.99, "inventory_count": 200},
    "P3": {"name": "Perplexinator", "description": "A perplexing perfunctator", "price": 79.99, "inventory_count": 1483}
}

initial_orders = {
    "O1": {"id": "O1", "product_id": "P1", "product_name": "Widget A", "quantity": 2, "price": 19.99, "status": "Shipped"},
    "O2": {"id": "O2", "product_id": "P2", "product_name": "Gadget B", "quantity": 1, "price": 49.99, "status": "Processing"}
}


In [10]:
#Knowledge base and Global Tools Placeholder
human_feedback_learnings = {}
tools_schemas_list = []

In [30]:
# Standalone Anthropic Completion Function (for basic tests)
def get_completion_anthropic_standalone(prompt: str):
    message = anthropic_client.messages.create(
        model=ANTHROPIC_MODEL_NAME,
        max_tokens=2000,
        temperature=0.0,
        system=worker_system_prompt,
        tools=tools_schemas_list,
        messages=[
          {"role": "user", "content": prompt}
        ]
    )
    return message.content[0].text

In [14]:
prompt_test_anthropic = "Hey there, which AI model do you use for answering questions?"
print(f"Anthropic Standalone Test: {get_completion_anthropic_standalone(prompt_test_anthropic)}")

Anthropic Standalone Test: I aim to be direct and transparent: I'm Claude, an AI assistant created by Anthropic. I'm designed to help with customer service for this e-commerce system, but I aim to be honest about what I am and am not. Is there something specific I can help you with regarding orders, products, or other e-commerce matters?


In [31]:
def get_completion_openai_standalone(prompt: str):
    response = openai_client.chat.completions.create(
        model=OPENAI_MODEL_NAME,
        max_tokens=2000,
        temperature=0.0,
        tools=tools_schemas_list,
        messages=[
            {"role": "system", "content": worker_system_prompt},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

In [16]:
prompt_test_openai = "Hey there, which AI model do you use for answering questions?"
print(f"OpenAI Standalone Test: {get_completion_openai_standalone(prompt_test_openai)}")

OpenAI Standalone Test: Hi! I use OpenAI’s GPT-4 model to answer your questions and assist you with your e-commerce needs. If you have any questions about your orders or products, feel free to ask!


In [17]:
def get_completion_eval_standalone(prompt: str):
    # Uses the eval_model_instance defined in Cell 4 which has the system prompt
        response = eval_model_instance.generate_content(prompt)
        return response.text

In [18]:
prompt_test_eval = "Hey there, can you tell me which AI you are and what your key tasks are?"
print(f"Gemini Eval Standalone Test:\n{get_completion_eval_standalone(prompt_test_eval)}")

Gemini Eval Standalone Test:
I am a large language model. My key task in this context is to act as an impartial evaluator of AI assistants' responses to customer service queries. I assess responses based on accuracy, efficiency, context awareness, and helpfulness, providing scores and feedback to help improve AI performance.


In [19]:
# Storage Class Definition
class Storage:
    """Storage class for global e-commerce data access"""
    def __init__(self):
        self.customers = initial_customers.copy()
        self.products = initial_products.copy()
        self.orders = initial_orders.copy()
        self.human_feedback_learnings = human_feedback_learnings # Links to the global dict

# Create a single global instance to be used by tool functions
storage = Storage()
print("Storage initialized.")


Storage initialized.


In [20]:
#Definitive list of tool schemas.
tools_schemas_list = [
    {
        "name": "create_customer",
        "description": "Adds a new customer to the database. Includes customer name, email, and (optional) phone number.",
        "input_schema": {
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "The name of the customer."},
                "email": {"type": "string", "description": "The email address of the customer."},
                "phone": {"type": "string", "description": "The phone number of the customer (optional)."}
            },
            "required": ["name", "email"]
        }
    },
    {
        "name": "get_customer_info",
        "description": "Retrieves customer information based on their customer ID. Returns the customer's name, email, and (optional) phone number.",
        "input_schema": {
            "type": "object",
            "properties": {
                "customer_id": {"type": "string", "description": "The unique identifier for the customer."}
            },
            "required": ["customer_id"]
        }
    },
    {
        "name": "create_product",
        "description": "Adds a new product to the product database. Includes name, description, price, and initial inventory count.",
        "input_schema": {
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "The name of the product."},
                "description": {"type": "string", "description": "A description of the product."},
                "price": {"type": "number", "description": "The price of the product."},
                "inventory_count": {"type": "integer", "description": "The amount of the product that is currently in inventory."}
            },
            "required": ["name", "description", "price", "inventory_count"]
        }
    },
    {
        "name": "update_product",
        "description": "Updates an existing product with new information. Only fields that are provided will be updated; other fields remain unchanged.",
        "input_schema": {
            "type": "object",
            "properties": {
                "product_id": {"type": "string", "description": "The unique identifier for the product to update."},
                "name": {"type": "string", "description": "The new name for the product (optional)."},
                "description": {"type": "string", "description": "The new description for the product (optional)."},
                "price": {"type": "number", "description": "The new price for the product (optional)."},
                "inventory_count": {"type": "integer", "description": "The new inventory count for the product (optional)."}
            },
            "required": ["product_id"]
        }
    },
    {
        "name": "get_product_info",
        "description": "Retrieves product information based on product ID or product name (with fuzzy matching for misspellings). Returns product details including name, description, price, and inventory count.",
        "input_schema": {
            "type": "object",
            "properties": {
                "product_id_or_name": {"type": "string", "description": "The product ID or name (can be approximate)."}
            },
            "required": ["product_id_or_name"]
        }
    },
    {
        "name": "list_all_products",
        "description": "Lists all available products in the inventory.",
        "input_schema": { "type": "object", "properties": {}, "required": [] }
    },
    {
        "name": "create_order",
        "description": "Creates an order using the product's current price. If requested quantity exceeds available inventory, no order is created and available quantity is returned. Orders can only be created for products that are in stock. Supports specifying products by either ID or name with fuzzy matching for misspellings.",
        "input_schema": {
            "type": "object",
            "properties": {
                "product_id_or_name": {"type": "string", "description": "The ID or name of the product to order (supports fuzzy matching)."},
                "quantity": {"type": "integer", "description": "The quantity of the product in the order."},
                "status": {"type": "string", "description": "The initial status of the order (e.g., 'Processing', 'Shipped')."}
            },
            "required": ["product_id_or_name", "quantity", "status"]
        }
    },
    {
        "name": "get_order_details",
        "description": "Retrieves the details of a specific order based on the order ID. Returns the order ID, product name, quantity, price, and order status.",
        "input_schema": {
            "type": "object",
            "properties": {
                "order_id": {"type": "string", "description": "The unique identifier for the order."}
            },
            "required": ["order_id"]
        }
    },
    {
        "name": "update_order_status",
        "description": "Updates the status of an order and adjusts inventory accordingly. Changing to \"Shipped\" decreases inventory. Changing to \"Returned\" or \"Canceled\" from \"Shipped\" increases inventory. Status can be \"Processing\", \"Shipped\", \"Delivered\", \"Returned\", or \"Canceled\".",
        "input_schema": {
            "type": "object",
            "properties": {
                "order_id": {"type": "string", "description": "The unique identifier for the order."},
                "new_status": {
                    "type": "string",
                    "description": "The new status to set for the order.",
                    "enum": ["Processing", "Shipped", "Delivered", "Returned", "Canceled"]
                }
            },
            "required": ["order_id", "new_status"]
        }
    }
]
print(f"Defined {len(tools_schemas_list)} tool schemas.")


Defined 9 tool schemas.


In [34]:
# These functions use the global 'storage' instance defined in Cell 14.

# Customer functions
def create_customer(name: str, email: str, phone: Optional[str] = None) -> Dict[str, Any]:
    """Creates a new customer and adds them to the customer database."""
    new_id = f"C{len(storage.customers) + 1}"
    storage.customers[new_id] = {"name": name, "email": email, "phone": phone}
    print(f"[Tool Executed] create_customer: ID {new_id}, Name: {name}")
    return {"status": "success", "customer_id": new_id, "customer": storage.customers[new_id]}

def get_customer_info(customer_id: str) -> Dict[str, Any]:
    """Retrieves information about a customer based on their ID."""
    customer = storage.customers.get(customer_id)
    if customer:
        print(f"[Tool Executed] get_customer_info: ID {customer_id} found.")
        return {"status": "success", "customer_id": customer_id, "customer": customer}
    print(f"[Tool Executed] get_customer_info: ID {customer_id} not found.")
    return {"status": "error", "message": "Customer not found"}

# Product functions
def create_product(name: str, description: str, price: float, inventory_count: int) -> Dict[str, Any]:
    """Creates a new product and adds it to the product database."""
    new_id = f"P{len(storage.products) + 1}"
    storage.products[new_id] = {
        "name": name,
        "description": description,
        "price": float(price),
        "inventory_count": int(inventory_count)
    }
    print(f"[Tool Executed] create_product: ID {new_id}, Name: {name}")
    return {"status": "success", "product_id": new_id, "product": storage.products[new_id]}

def update_product(product_id: str, name: Optional[str] = None, description: Optional[str] = None,
                   price: Optional[float] = None, inventory_count: Optional[int] = None) -> Dict[str, Any]:
    """Updates a product with the provided parameters."""
    if product_id not in storage.products:
        print(f"[Tool Executed] update_product: ID {product_id} not found.")
        return {"status": "error", "message": f"Product {product_id} not found"}

    product = storage.products[product_id]
    updated_fields = []
    # update_details = {} # Not strictly needed for the return if product is returned

    if name is not None:
        product["name"] = name
        updated_fields.append("name")
    if description is not None:
        product["description"] = description
        updated_fields.append("description")
    if price is not None:
        product["price"] = float(price)
        updated_fields.append("price")
    if inventory_count is not None:
        product["inventory_count"] = int(inventory_count)
        updated_fields.append("inventory_count")

    if not updated_fields:
        print(f"[Tool Executed] update_product: ID {product_id}, no fields updated.")
        return {"status": "warning", "message": "No fields were updated.", "product": product}

    print(f"[Tool Executed] update_product: ID {product_id}, Updated fields: {', '.join(updated_fields)}")
    return {
        "status": "success",
        "message": f"Product {product_id} updated. Fields: {', '.join(updated_fields)}",
        "product_id": product_id,
        "updated_fields": updated_fields,
        "product": product # Return the updated product object
    }

def find_product_by_name(product_name: str, min_similarity: int = 70) -> Tuple[Optional[str], Optional[Dict[str, Any]]]:
    """Find a product by name using fuzzy string matching."""
    if not product_name: return None, None

    # Create a list of (name, id) tuples for extractOne to work with and retrieve id later
    name_id_list = [(p_data["name"], p_id) for p_id, p_data in storage.products.items()]
    if not name_id_list: return None, None

    # Extract from names only
    best_match_name_score = process.extractOne(
        product_name,
        [item[0] for item in name_id_list], # list of names
        scorer=fuzz.token_sort_ratio # A good general purpose scorer
    )

    if best_match_name_score and best_match_name_score[1] >= min_similarity:
        matched_name = best_match_name_score[0]
        # Find the product_id corresponding to the matched_name
        for name, pid_val in name_id_list:
            if name == matched_name:
                print(f"[Tool Helper] find_product_by_name: Matched '{product_name}' to '{matched_name}' (ID: {pid_val}) with score {best_match_name_score[1]}")
                return pid_val, storage.products[pid_val]

    print(f"[Tool Helper] find_product_by_name: No good match for '{product_name}' (min_similarity: {min_similarity}, Best match: {best_match_name_score})")
    return None, None


def get_product_id(product_identifier: str) -> Optional[str]:
    """Get product ID either directly or by fuzzy matching the name."""
    if product_identifier in storage.products:
        return product_identifier
    product_id, _ = find_product_by_name(product_identifier)
    return product_id

def get_product_info(product_id_or_name: str) -> Dict[str, Any]:
    """Get information about a product by its ID or name."""
    if product_id_or_name in storage.products:
        product = storage.products[product_id_or_name]
        print(f"[Tool Executed] get_product_info: Found by ID '{product_id_or_name}'.")
        return {"status": "success", "product_id": product_id_or_name, "product": product}

    product_id, product = find_product_by_name(product_id_or_name)
    if product_id and product:
        print(f"[Tool Executed] get_product_info: Found by name (fuzzy) '{product_id_or_name}' as ID '{product_id}'.")
        return {"status": "success", "message": f"Found product matching '{product_id_or_name}'", "product_id": product_id, "product": product}

    print(f"[Tool Executed] get_product_info: No product found for '{product_id_or_name}'.")
    return {"status": "error", "message": f"No product found matching '{product_id_or_name}'"}

def list_all_products() -> Dict[str, Any]:
    """List all available products in the inventory."""
    print(f"[Tool Executed] list_all_products: Found {len(storage.products)} products.")
    # Return a copy to prevent direct modification of storage from tool result
    return {"status": "success", "count": len(storage.products), "products": dict(storage.products)}

# Order functions
def create_order(product_id_or_name: str, quantity: int, status: str) -> Dict[str, Any]:
    """Creates an order using the product's stored price."""
    actual_product_id = get_product_id(product_id_or_name)

    if not actual_product_id:
        print(f"[Tool Executed] create_order: Product '{product_id_or_name}' not found.")
        return {"status": "error", "message": f"Product '{product_id_or_name}' not found."}

    product = storage.products[actual_product_id]
    price = product["price"]

    if product["inventory_count"] == 0:
        print(f"[Tool Executed] create_order: Product ID {actual_product_id} is out of stock.")
        return {"status": "error", "message": f"{product['name']} is out of stock."}
    if quantity <= 0:
        print(f"[Tool Executed] create_order: Quantity must be positive. Requested: {quantity}")
        return {"status": "error", "message": "Quantity must be a positive number."}
    if quantity > product["inventory_count"]:
        print(f"[Tool Executed] create_order: Insufficient inventory for {product['name']} (ID: {actual_product_id}). Available: {product['inventory_count']}, Requested: {quantity}")
        return {
            "status": "partial_availability",
            "message": f"Insufficient inventory. Only {product['inventory_count']} units of {product['name']} are available.",
            "available_quantity": product["inventory_count"],
            "requested_quantity": quantity,
            "product_name": product['name']
        }

    if status == "Shipped": # Only adjust inventory if shipped on creation
        product["inventory_count"] -= quantity
        print(f"[Tool Executed] create_order: Inventory for {product['name']} (ID: {actual_product_id}) reduced by {quantity} due to 'Shipped' status on creation.")

    new_id = f"O{len(storage.orders) + 1}"
    storage.orders[new_id] = {
        "id": new_id,
        "product_id": actual_product_id,
        "product_name": product["name"],
        "quantity": quantity,
        "price": price,
        "status": status
    }
    print(f"[Tool Executed] create_order: Order {new_id} created for {quantity} of {product['name']} (ID: {actual_product_id}). Status: {status}. Remaining inv: {product['inventory_count']}")
    return {
        "status": "success",
        "order_id": new_id,
        "order_details": storage.orders[new_id], # Return a copy
        "remaining_inventory": product["inventory_count"]
    }

def get_order_details(order_id: str) -> Dict[str, Any]:
    """Get details of a specific order."""
    order = storage.orders.get(order_id)
    if order:
        print(f"[Tool Executed] get_order_details: Order {order_id} found.")
        return {"status": "success", "order_id": order_id, "order_details": dict(order)} # Return a copy
    print(f"[Tool Executed] get_order_details: Order {order_id} not found.")
    return {"status": "error", "message": "Order not found"}

def update_order_status(order_id: str, new_status: str) -> Dict[str, Any]:
    """Updates the status of an order and adjusts inventory accordingly."""
    if order_id not in storage.orders:
        print(f"[Tool Executed] update_order_status: Order {order_id} not found.")
        return {"status": "error", "message": "Order not found"}

    order = storage.orders[order_id]
    old_status = order["status"]
    product_id = order["product_id"]
    quantity = order["quantity"]

    if old_status == new_status:
        print(f"[Tool Executed] update_order_status: Order {order_id} status unchanged ({old_status}).")
        return {"status": "unchanged", "message": f"Order {order_id} status is already {old_status}", "order_details": dict(order)}

    inventory_adjusted = False
    if product_id in storage.products:
        product = storage.products[product_id]
        current_inventory = product["inventory_count"]

        if new_status == "Shipped" and old_status not in ["Shipped", "Delivered"]:
            if current_inventory < quantity:
                print(f"[Tool Executed] update_order_status: Insufficient inventory to ship order {order_id}. Have {current_inventory}, need {quantity}.")
                return {"status": "error", "message": f"Insufficient inventory to ship. Available: {current_inventory}, Required: {quantity}"}
            product["inventory_count"] -= quantity
            inventory_adjusted = True
            print(f"[Tool Executed] update_order_status: Order {order_id} Shipped. Inv for {product_id} reduced by {quantity} to {product['inventory_count']}.")
        elif new_status in ["Returned", "Canceled"] and old_status in ["Shipped", "Delivered"]:
            product["inventory_count"] += quantity
            inventory_adjusted = True
            print(f"[Tool Executed] update_order_status: Order {order_id} {new_status}. Inv for {product_id} increased by {quantity} to {product['inventory_count']}.")
    else:
        print(f"[Tool Executed] update_order_status: Product {product_id} for order {order_id} not found for inventory adjustment.")

    order["status"] = new_status
    print(f"[Tool Executed] update_order_status: Order {order_id} status updated from {old_status} to {new_status}.")
    return {
        "status": "success",
        "message": f"Order {order_id} status updated from {old_status} to {new_status}.",
        "order_id": order_id,
        "product_id": product_id,
        "old_status": old_status,
        "new_status": new_status,
        "inventory_adjusted": inventory_adjusted,
        "current_inventory": storage.products[product_id]["inventory_count"] if product_id in storage.products else "unknown",
        "order_details": dict(order) # Return updated order details
    }

print("Tool functions defined.")

Tool functions defined.


In [35]:
class ConversationContext:
    def __init__(self):
        self.messages: List[Dict[str, Any]] = []
        self.context_data: Dict[str, Any] = {
            "customers": {}, "products": {}, "orders": {}, "last_action": None
        }
        self.session_start_time = datetime.now()

    def add_user_message(self, message: str) -> None:
        self.messages.append({"role": "user", "content": message})

    def add_assistant_message(self, message_content: Union[str, List[Dict[str, Any]]]) -> None:
        self.messages.append({"role": "assistant", "content": message_content})

    def update_entity_in_context(self, entity_type: str, entity_id: str, data: Any) -> None:
        if entity_type in self.context_data:
            self.context_data[entity_type][entity_id] = data # Store the actual data
            print(f"[Context Updated] Entity: {entity_type}, ID: {entity_id}, Data (type): {type(data)}")

    def set_last_action(self, action_type: str, action_details: Any) -> None: # Renamed from action_data
        self.context_data["last_action"] = {
            "type": action_type,
            "details": action_details, # Renamed from data
            "timestamp": datetime.now().isoformat()
        }
        print(f"[Context Updated] Last Action: {action_type}, Details: {json.dumps(action_details, default=str)}")


    def get_full_conversation_for_api(self) -> List[Dict[str, Any]]:
        return self.messages.copy()

    def get_context_summary(self) -> str:
        summary_parts = []
        if self.context_data["customers"]:
            customers_str = ", ".join([f"ID: {cid} (Name: {c.get('name', 'N/A') if isinstance(c, dict) else 'N/A'})" for cid, c in self.context_data["customers"].items()])
            summary_parts.append(f"Recent customers: {customers_str}")
        if self.context_data["products"]:
            products_str = ", ".join([f"ID: {pid} (Name: {p.get('name', 'N/A') if isinstance(p, dict) else 'N/A'})" for pid, p in self.context_data["products"].items()])
            summary_parts.append(f"Recent products: {products_str}")
        if self.context_data["orders"]:
            orders_str = ", ".join([f"ID: {oid} (Product: {o.get('product_name', 'N/A') if isinstance(o, dict) else 'N/A'}, Status: {o.get('status', 'N/A') if isinstance(o, dict) else 'N/A'})" for oid, o in self.context_data["orders"].items()])
            summary_parts.append(f"Recent orders: {orders_str}")

        last_action = self.context_data["last_action"]
        if last_action:
            action_type = last_action['type']
            action_details_summary = "..." # Default summary
            if isinstance(last_action.get('details'), dict):
                action_input = last_action['details'].get('input', {})
                action_result_status = last_action['details'].get('result', {}).get('status')
                action_details_summary = f"Input: {action_input}, Result Status: {action_result_status}"
                if action_result_status == "success":
                    if "order_id" in last_action['details'].get('result', {}):
                         action_details_summary += f", OrderID: {last_action['details']['result']['order_id']}"
                    elif "product_id" in last_action['details'].get('result', {}):
                         action_details_summary += f", ProductID: {last_action['details']['result']['product_id']}"


            summary_parts.append(f"Last action: {action_type} at {last_action['timestamp']} ({action_details_summary})")

        if not summary_parts: return "No specific context items set yet."
        return "\n".join(summary_parts)

    def clear(self) -> None:
        self.messages = []
        self.context_data = {"customers": {}, "products": {}, "orders": {}, "last_action": None}
        self.session_start_time = datetime.now()
        print("[Context Cleared]")

print("ConversationContext class defined.")


ConversationContext class defined.


In [40]:
class DualAgentEvaluator:
    def __init__(self):
        self.conversation_context = ConversationContext()
        self.evaluation_results = []

        # Anthropic uses tools_schemas_list directly
        self.anthropic_tools_schemas = tools_schemas_list

        # OpenAI needs a specific format: {"type": "function", "function": {...}}
        self.openai_tools_formatted = []
        if tools_schemas_list:
            self.openai_tools_formatted = [
                {
                    "type": "function",
                    "function": {
                        "name": tool_def["name"],
                        "description": tool_def["description"],
                        "parameters": tool_def["input_schema"] # input_schema becomes parameters
                    }
                }
                for tool_def in tools_schemas_list
            ]

        self.available_tool_functions = {
            "create_customer": create_customer, "get_customer_info": get_customer_info,
            "create_product": create_product, "update_product": update_product,
            "get_product_info": get_product_info, "list_all_products": list_all_products,
            "create_order": create_order, "get_order_details": get_order_details,
            "update_order_status": update_order_status,
        }
        self.human_feedback_learnings = human_feedback_learnings # Global dict
        print("DualAgentEvaluator initialized. OpenAI tools formatted.")

    def _update_context_from_tool_results(self, tool_name: str, tool_input: Dict, tool_result: Dict):
        """Helper to update conversation context based on tool results."""
        # Ensure tool_result is a dictionary
        if not isinstance(tool_result, dict):
            print(f"[Context Update Error] Tool result for {tool_name} is not a dict: {tool_result}")
            self.conversation_context.set_last_action(tool_name, {"input": tool_input, "result": {"status": "error", "message": "Tool result was not a dictionary."}})
            return

        if tool_result.get("status") == "success":
            if "customer_id" in tool_result and "customer" in tool_result and isinstance(tool_result["customer"], dict):
                self.conversation_context.update_entity_in_context("customers", tool_result["customer_id"], tool_result["customer"])
            elif "product_id" in tool_result and "product" in tool_result and isinstance(tool_result["product"], dict):
                self.conversation_context.update_entity_in_context("products", tool_result["product_id"], tool_result["product"])
            elif "order_id" in tool_result and "order_details" in tool_result and isinstance(tool_result["order_details"], dict):
                self.conversation_context.update_entity_in_context("orders", tool_result["order_id"], tool_result["order_details"])
            elif tool_name == "list_all_products" and "products" in tool_result and isinstance(tool_result["products"], dict):
                 # Potentially update context for all listed products if needed, or just the action
                 for pid, pdata in tool_result["products"].items():
                     self.conversation_context.update_entity_in_context("products", pid, pdata)

        self.conversation_context.set_last_action(tool_name, {"input": tool_input, "result": tool_result})


    def process_tool_call(self, tool_name: str, tool_input: Dict[str, Any]) -> Dict[str, Any]:
        print(f"--- [Tool Dispatcher] Attempting tool: {tool_name} with input: {json.dumps(tool_input, default=str)} ---")
        if tool_name in self.available_tool_functions:
            function_to_call = self.available_tool_functions[tool_name]
            try:
                result = function_to_call(**tool_input)
                print(f"--- [Tool Dispatcher] Result for {tool_name}: {json.dumps(result, indent=2, default=str)} ---")
                return result
            except TypeError as te:
                print(f"--- [Tool Dispatcher] TypeError for {tool_name}: {te}. Input: {tool_input} ---")
                return {"status": "error", "message": f"TypeError calling {tool_name}: {str(te)}. Check arguments."}
            except Exception as e:
                print(f"--- [Tool Dispatcher] Exception for {tool_name}: {e} ---")
                return {"status": "error", "message": f"Error executing {tool_name}: {str(e)}"}
        else:
            print(f"--- [Tool Dispatcher] Tool {tool_name} not found. ---")
            return {"status": "error", "message": f"Tool {tool_name} not found."}

    def get_anthropic_response(self, current_worker_system_prompt: str, conversation_history: List[Dict[str, Any]]) -> str:
        messages_for_api = conversation_history.copy()
        try:
            for i in range(5): # Max 5 tool iterations
                # Prepare the system prompt snippet for safe inclusion in the f-string
                system_prompt_snippet = current_worker_system_prompt[:60].replace('\n', ' ')
                print(f"\nAnthropic API Call #{i+1}. System: '{system_prompt_snippet}...', Messages count: {len(messages_for_api)}")
                if messages_for_api: print(f"Last message role: {messages_for_api[-1]['role']}")

                response = anthropic_client.messages.create(
                    model=ANTHROPIC_MODEL_NAME, max_tokens=4000,
                    system=current_worker_system_prompt,
                    tools=self.anthropic_tools_schemas, # Use Anthropic specific schemas
                    messages=messages_for_api
                )

                assistant_response_blocks = response.content
                messages_for_api.append({"role": "assistant", "content": assistant_response_blocks}) # Add assistant's turn (raw blocks)

                tool_calls_to_process = [block for block in assistant_response_blocks if block.type == "tool_use"]
                text_blocks = [block.text for block in assistant_response_blocks if block.type == "text"]

                if not tool_calls_to_process:
                    final_text = " ".join(text_blocks).strip()
                    print(f"Anthropic Final Text (no tool use this turn): {final_text}")
                    return final_text if final_text else "No text content in final Anthropic response."

                tool_results_for_next_call = []
                for tool_use_block in tool_calls_to_process:
                    tool_name, tool_input, tool_use_id = tool_use_block.name, tool_use_block.input, tool_use_block.id
                    print(f"Anthropic Tool Call: {tool_name}, Input: {tool_input}")
                    tool_result_data = self.process_tool_call(tool_name, tool_input)
                    self._update_context_from_tool_results(tool_name, tool_input, tool_result_data) # Update context here

                    tool_results_for_next_call.append({
                        "type": "tool_result", "tool_use_id": tool_use_id,
                        "content": json.dumps(tool_result_data) # Anthropic expects content as string for tool_result
                    })

                messages_for_api.append({"role": "user", "content": tool_results_for_next_call}) # Add tool results as a user message

            return "Max tool iterations reached for Anthropic."
        except Exception as e:
            print(f"Error in get_anthropic_response: {str(e)}")
            import traceback
            traceback.print_exc()
            return f"Error getting Anthropic response: {str(e)}"

    def get_openai_response(self, current_worker_system_prompt: str, conversation_history: List[Dict[str, Any]]) -> str:
        # OpenAI expects messages to be a list of dicts. System message is first.
        # Assistant messages with tool_calls should be ChatCompletionMessage objects or dicts.

        # Start with system prompt
        messages_for_api = [{"role": "system", "content": current_worker_system_prompt}]

        # Convert conversation history to OpenAI compatible format if necessary
        for msg in conversation_history:
            if msg["role"] == "assistant" and isinstance(msg["content"], list): # Anthropic's block list
                # Convert Anthropic blocks to OpenAI's format if needed, or simplify
                # For now, if it's a complex Anthropic response, we might simplify or log a warning
                # This part is tricky because the history might contain Anthropic's response structure.
                # For OpenAI, we should only feed it OpenAI-compatible history.
                # This implies separate histories or a more complex conversion.
                # For this iteration, let's assume history is generally compatible or needs filtering.

                # A simple approach: if assistant message has tool_calls (OpenAI format), keep it.
                # If it's Anthropic block list, try to extract text or tool calls.
                # This is a simplification. True multi-vendor history management is complex.

                # Let's assume `conversation_history` is being built turn by turn,
                # and `add_assistant_message` stores the API-specific response object.
                # The `get_full_conversation_for_api` in `ConversationContext` should ideally handle this.
                # For now, let's just append, assuming `msg` is already somewhat compatible.
                messages_for_api.append(msg)

            else: # User messages or simple assistant text messages
                 messages_for_api.append(msg)


        try:
            for i in range(5): # Max 5 tool iterations
                print(f"\nOpenAI API Call #{i+1}. Messages count: {len(messages_for_api)}")
                if messages_for_api: print(f"Last message role: {messages_for_api[-1].get('role') if isinstance(messages_for_api[-1], dict) else 'N/A'}")

                response = openai_client.chat.completions.create(
                    model=OPENAI_MODEL_NAME,
                    messages=messages_for_api,
                    tools=self.openai_tools_formatted, # Use pre-formatted tools for OpenAI
                    tool_choice="auto"
                )
                response_message = response.choices[0].message
                # print(f"OpenAI Raw Response Message object: {response_message}")

                # Add assistant's response (which might include tool_calls) to messages_for_api
                # Convert Pydantic model to dict for consistent history storage if preferred
                messages_for_api.append(response_message.model_dump())


                if not response_message.tool_calls:
                    final_text = response_message.content if response_message.content else "No text content in final OpenAI response."
                    print(f"OpenAI Final Text (no tool use this turn): {final_text}")
                    return final_text

                tool_calls_for_next_api_call = []
                for tool_call in response_message.tool_calls:
                    tool_name = tool_call.function.name
                    tool_input_str = tool_call.function.arguments
                    tool_call_id = tool_call.id
                    try:
                        tool_input = json.loads(tool_input_str)
                    except json.JSONDecodeError:
                        print(f"OpenAI Tool Call JSON Error for {tool_name}: {tool_input_str}")
                        tool_result_data = {"status": "error", "message": "Invalid JSON arguments from model."}
                    else:
                        print(f"OpenAI Tool Call: {tool_name}, Input: {tool_input}")
                        tool_result_data = self.process_tool_call(tool_name, tool_input)

                    self._update_context_from_tool_results(tool_name, tool_input, tool_result_data) # Update context

                    tool_calls_for_next_api_call.append({
                        "tool_call_id": tool_call_id, "role": "tool", "name": tool_name,
                        "content": json.dumps(tool_result_data) # Result must be a string
                    })

                messages_for_api.extend(tool_calls_for_next_api_call) # Add all tool results for next iteration

            return "Max tool iterations reached for OpenAI."
        except Exception as e:
            print(f"Error in get_openai_response: {str(e)}")
            import traceback
            traceback.print_exc()
            return f"Error getting OpenAI response: {str(e)}"

    def process_user_request(self, user_message: str) -> Dict[str, Any]:
        print(f"\n\n{'='*60}\nUser Message: {user_message}\n{'='*60}")
        self.conversation_context.add_user_message(user_message)

        context_summary = self.conversation_context.get_context_summary()
        print(f"Current Context Summary for Models:\n{context_summary}\n{'-'*60}")

        current_worker_prompt_with_context = f"{worker_system_prompt}\n\nConversation Context:\n{context_summary}"

        # Get a clean copy of history for each LLM to avoid contamination from other LLM's specific message formats
        # This is a simplified approach. A more robust system might have distinct history tracking or conversion.
        # For now, `get_full_conversation_for_api` provides the base.
        # The LLM-specific methods will prepend their system messages.

        anthropic_history_for_call = self.conversation_context.get_full_conversation_for_api()
        anthropic_response_text = self.get_anthropic_response(current_worker_prompt_with_context, anthropic_history_for_call)
        # The get_anthropic_response method itself now appends its turns to its local `messages_for_api`.
        # We need to decide how to update the shared `self.conversation_context.messages`.
        # For now, we add a simplified text summary.
        self.conversation_context.add_assistant_message(f"[Anthropic Final Text]: {anthropic_response_text}")


        openai_history_for_call = self.conversation_context.get_full_conversation_for_api()
        openai_response_text = self.get_openai_response(current_worker_prompt_with_context, openai_history_for_call)
        self.conversation_context.add_assistant_message(f"[OpenAI Final Text]: {openai_response_text}")


        print(f"\n--- Anthropic Final Response Text ---\n{anthropic_response_text}")
        print(f"--- OpenAI Final Response Text ---\n{openai_response_text}")

        evaluation = self.evaluate_responses(user_message, anthropic_response_text, openai_response_text, context_summary)
        self.evaluation_results.append(evaluation)

        return {
            "user_message": user_message,
            "anthropic_response": anthropic_response_text,
            "openai_response": openai_response_text,
            "evaluation": evaluation
        }

    def evaluate_responses(self, user_message: str, anthropic_response: str, openai_response: str, context_summary_for_eval: str) -> Dict[str, Any]:
        print("\n--- Starting Evaluation by Gemini ---")
        try:
            # Prepare ground truth data from the global 'storage' instance
            ground_truth_customers = json.dumps(storage.customers, indent=2, default=str)
            ground_truth_products = json.dumps(storage.products, indent=2, default=str)
            ground_truth_orders = json.dumps(storage.orders, indent=2, default=str)

            ground_truth_section = f"""
                Ground Truth E-commerce Data:
                Customers:
                {ground_truth_customers}

                Products:
                {ground_truth_products}

                Orders:
                {ground_truth_orders}
                """
            eval_prompt_parts = [
                f"User query: {user_message}",
                f"Current context provided to assistants:\n{context_summary_for_eval}", # This is the summary *before* the current turn's responses
                f"Anthropic Claude response:\n{anthropic_response}",
                f"OpenAI GPT response:\n{openai_response}",
                "Please evaluate both responses based on accuracy, efficiency, context awareness, and helpfulness. Provide an overall score (1-10) for each. If ambiguity required human clarification, note it."
            ]
            relevant_learnings = self.check_relevant_learnings(user_message)
            if relevant_learnings:
                eval_prompt_parts.append(f"\nRelevant past learnings from similar situations:\n{relevant_learnings}")

            eval_prompt = "\n\n".join(eval_prompt_parts)
            # print(f"Gemini Eval Prompt (first 300 chars): {eval_prompt[:300]}...")

            gemini_response_obj = eval_model_instance.generate_content(eval_prompt)
            evaluation_text = gemini_response_obj.text
            print(f"Gemini Raw Evaluation:\n{evaluation_text}")

            needs_human_input = "human clarification" in evaluation_text.lower() or "admin user" in evaluation_text.lower()
            clarification_details = {"used": False, "needed": "", "provided_input": ""}

            if needs_human_input:
                clarification_details["used"] = True
                clarification_details["needed"] = self.extract_clarification_needed(evaluation_text)
                print(f"--- Human Clarification Indicated by Evaluator ---")
                print(f"Clarification needed by evaluator: {clarification_details['needed']}")
                try:
                    human_input_for_eval = input(f"Enter human clarification for evaluator (or type 'skip'): ")
                    if human_input_for_eval.lower() != 'skip':
                        clarification_details["provided_input"] = human_input_for_eval
                        self.store_learning(user_message, clarification_details["needed"], human_input_for_eval)
                        updated_eval_prompt = f"{eval_prompt}\n\nHuman clarification provided to evaluator: {human_input_for_eval}"
                        # print(f"Gemini Re-Eval Prompt (first 300 chars): {updated_eval_prompt[:300]}...")
                        updated_gemini_response = eval_model_instance.generate_content(updated_eval_prompt)
                        evaluation_text = updated_gemini_response.text
                        print(f"Gemini Raw Re-Evaluation:\n{evaluation_text}")
                    else:
                        print("Skipping human input for evaluator.")
                except EOFError:
                    print("EOFError: Skipping human clarification for evaluator (non-interactive).")
                    clarification_details["provided_input"] = "Skipped (non-interactive)"

            anthropic_score = self.extract_score(evaluation_text, "Anthropic")
            openai_score = self.extract_score(evaluation_text, "OpenAI")

            return {
                "anthropic_score": anthropic_score, "openai_score": openai_score,
                "full_evaluation": evaluation_text, "clarification_details": clarification_details
            }
        except Exception as e:
            print(f"Error in evaluation: {str(e)}")
            import traceback
            traceback.print_exc()
            return {
                "error": f"Error in evaluation: {str(e)}",
                "anthropic_score": 0, "openai_score": 0,
                "full_evaluation": f"Evaluation failed: {str(e)}",
                "clarification_details": {"used": False}
            }

    def extract_clarification_needed(self, evaluation_text: str) -> str:
        match = re.search(r"Clearly state what information is needed:\s*(.*?)\s*(Ask an admin user|$)", evaluation_text, re.DOTALL | re.IGNORECASE)
        if match and match.group(1).strip(): return match.group(1).strip()

        lines = evaluation_text.split('\n')
        for i, line in enumerate(lines):
            if "clarification is needed" in line.lower() or "what information is needed" in line.lower():
                return "\n".join(lines[i:i+3]).strip()
        return "Unspecified clarification needed by evaluator."

    def store_learning(self, query: str, clarification_needed: str, human_input: str):
        keywords = self.extract_keywords(query)
        for keyword in keywords:
            if keyword not in self.human_feedback_learnings:
                self.human_feedback_learnings[keyword] = []
            self.human_feedback_learnings[keyword].append({
                "original_query": query,
                "clarification_needed_by_evaluator": clarification_needed,
                "human_input_for_evaluator": human_input,
                "timestamp": datetime.now().isoformat()
            })
        print(f"Learning stored for query keywords: {keywords}")

    def check_relevant_learnings(self, query: str) -> Optional[str]:
        keywords = self.extract_keywords(query)
        relevant_learnings_text = []
        for keyword in keywords:
            if keyword in self.human_feedback_learnings:
                for learning in self.human_feedback_learnings[keyword]:
                    relevant_learnings_text.append(
                        f"- Query context: '{learning['original_query']}'\n"
                        f"  Clarification needed: {learning['clarification_needed_by_evaluator']}\n"
                        f"  Provided input: {learning['human_input_for_evaluator']}"
                    )
        return "\n\n".join(relevant_learnings_text) if relevant_learnings_text else None

    def extract_keywords(self, text: str) -> List[str]:
        words = re.findall(r'\b\w{4,}\b', text.lower())
        stop_words = {"the", "and", "is", "in", "to", "a", "of", "for", "with", "on", "at", "what", "how", "show", "tell", "please", "what's", "i'd"}
        return list(set(word for word in words if word not in stop_words))

    def extract_score(self, evaluation_text: str, model_name_pattern: str) -> int:
        patterns = [
            rf"{model_name_pattern}.*?Overall Score.*?(\d+)/10",  # Anthropic Overall Score: 8/10
            rf"{model_name_pattern}.*?Overall Score:\s*(\d+)",     # Anthropic Overall Score: 8
            rf"Overall Score.*?{model_name_pattern}.*?:\s*(\d+)", # Overall Score (Anthropic): 8
            rf"{model_name_pattern}.*?score.*?:.*?(\d+)",          # Anthropic score: 8
            rf"{model_name_pattern}.*?\b(\d+)/10",                 # Anthropic 8/10
            rf"{model_name_pattern}.*?\bscore\b.*?(\d+)",          # Anthropic ... score ... 8
        ]
        for p_str in patterns:
            match = re.search(p_str, evaluation_text, re.IGNORECASE | re.DOTALL)
            if match and match.group(1):
                try: return int(match.group(1))
                except ValueError: continue

        # Fallback: find model name, then nearest number after "score" or just a number near model name
        model_name_lower = model_name_pattern.lower()
        text_lower = evaluation_text.lower()
        model_indices = [m.start() for m in re.finditer(model_name_lower, text_lower)]

        for model_idx in model_indices:
            search_area = evaluation_text[model_idx : model_idx + 100] # Search 100 chars around model name
            score_match = re.search(r'\b(\d+)\b(?:/10)?', search_area, re.IGNORECASE) # Look for a number, optionally like X/10
            if score_match:
                try: return int(score_match.group(1))
                except ValueError: pass

        print(f"Could not extract score for '{model_name_pattern}' from eval text snippet:\n{evaluation_text[:250]}...")
        return 0

print("DualAgentEvaluator class defined.")

DualAgentEvaluator class defined.


In [38]:
def main():
    print("\nStarting Main Execution...\n")
    if any(k in globals() and "YOUR_" in globals()[k] for k in ["ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY"]):
        print("ERROR: Placeholder API keys detected. Update in Cell 2 (UserDataMock) or environment. Exiting.")
        return

    agent = DualAgentEvaluator()
    results_log = []
    test_queries = [
        "Show me all available products",
        "I'd like to order 2 of the 'Widget A' please, status 'Processing'",
        "What is the status of order O2?", # More specific query for context
        # "How many 'Gadget B' are left in stock?" # Example of a query that should use get_product_info
        # "Create a new customer named Alice Wonderland, email alice@wonder.land"
    ]

    for query in test_queries:
        try:
            result = agent.process_user_request(query)
            results_log.append(result)
        except Exception as e:
            print(f"CRITICAL ERROR processing query '{query}': {e}")
            import traceback
            traceback.print_exc()
            results_log.append({
                "user_message": query, "anthropic_response": "ERROR", "openai_response": "ERROR",
                "evaluation": {"anthropic_score": 0, "openai_score": 0, "full_evaluation": f"Critical error: {e}", "clarification_details": {"used": False}}
            })
        # time.sleep(1) # Optional delay

    print("\n\n===== EVALUATION SUMMARY =====")
    total_anthropic, total_openai, num_q = 0, 0, len(results_log)
    for i, res in enumerate(results_log):
        print(f"\nQuery {i+1}: {res['user_message']}")
        print(f"  Anthropic Resp: {res['anthropic_response'][:100]}...")
        print(f"  OpenAI Resp: {res['openai_response'][:100]}...")

        eval_data = res['evaluation']
        anth_s, open_s = eval_data.get('anthropic_score',0), eval_data.get('openai_score',0)
        total_anthropic += anth_s; total_openai += open_s
        print(f"  Scores - Anthropic: {anth_s}, OpenAI: {open_s}")
        if eval_data.get('clarification_details',{}).get('used'):
            print(f"    Clarification: Needed='{eval_data['clarification_details']['needed']}', Provided='{eval_data['clarification_details']['provided_input']}'")
        winner = "Tie" if anth_s == open_s else ("Anthropic" if anth_s > open_s else "OpenAI")
        print(f"  Query Winner: {winner}")

    print(f"\n----- Overall Performance -----")
    if num_q > 0:
        print(f"Avg Anthropic: {total_anthropic/num_q:.2f}, Avg OpenAI: {total_openai/num_q:.2f}")
    print(f"Total Anthropic: {total_anthropic}, Total OpenAI: {total_openai}")
    overall_winner = "Tie" if total_anthropic == total_openai else ("Anthropic" if total_anthropic > total_openai else "OpenAI")
    print(f"Overall Winner: {overall_winner}")

    if agent.human_feedback_learnings:
        print("\n----- Learned Clarifications (for Evaluator) -----")
        for kw, l_list in agent.human_feedback_learnings.items():
            print(f"Keyword: {kw}")
            for item in l_list: print(f"  - Q: '{item['original_query']}', Needed: {item['clarification_needed_by_evaluator']}, Input: {item['human_input_for_evaluator']}")
    print("\nExecution Finished.")

if __name__ == "__main__":
    # In Jupyter, call main() in a new cell.
    # For direct script execution:
    # main()
    pass

print("Main function defined. Call main() in a new cell to run the evaluation.")

Main function defined. Call main() in a new cell to run the evaluation.


In [41]:
main()


Starting Main Execution...

DualAgentEvaluator initialized. OpenAI tools formatted.


User Message: Show me all available products
Current Context Summary for Models:
No specific context items set yet.
------------------------------------------------------------

Anthropic API Call #1. System: ' You are a helpful customer service assistant for an e-comme...', Messages count: 1
Last message role: user
Anthropic Tool Call: list_all_products, Input: {}
--- [Tool Dispatcher] Attempting tool: list_all_products with input: {} ---
[Tool Executed] list_all_products: Found 3 products.
--- [Tool Dispatcher] Result for list_all_products: {
  "status": "success",
  "count": 3,
  "products": {
    "P1": {
      "name": "Widget A",
      "description": "A simple widget. Very compact.",
      "price": 19.99,
      "inventory_count": 999
    },
    "P2": {
      "name": "Gadget B",
      "description": "A powerful gadget. It spins.",
      "price": 49.99,
      "inventory_count": 200
    },
    "P3":