In [1]:
"""First, let's install the pre-requisites for this workshop."""

%pip install weave openai pydantic -qqq

Note: you may need to restart the kernel to use updated packages.


In [2]:
"""Baisc authentication setup for OpenAI"""
import os
from getpass import getpass

if not os.environ["OPENAI_API_KEY"] or True:
    os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")

In [5]:
"""Next, let's define a simple use case: Support Triage"""
import json

from openai import OpenAI
from pydantic import BaseModel
import weave


class MessageProperties(BaseModel):
    customer_name: str
    product_model: str
    issue_description: str

@weave.op
def extract_properties_from_message(message: str) -> MessageProperties:
    # Define the system prompt
    system_prompt = """
    You are a helpful assistant that extracts properties from a customer support message.
    """

    # Define the user prompt
    user_prompt = f"""
    Extract the following properties:
    - customer_name
    - product_model
    - issue_description

    from the customer support message:
    {message}
    """

    # Initialize the OpenAI client
    openai = OpenAI()

    # Fetch the response
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
    )

    # Parse the response
    parsed = json.loads(response.choices[0].message.content)

    # Validate the response
    result = MessageProperties.model_validate(parsed)

    # Return the result
    return result

In [7]:
"""
Now, let's run this on a basic example:
"""

result = extract_properties_from_message(
    "Hello, My XPhone 12 Pro keeps randomly shutting off mid-call. Please help."
)
print(result)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [8]:
weave.init("prompt_eng_workshop")

result = extract_properties_from_message(
    "Hello, My XPhone 12 Pro keeps randomly shutting off mid-call. Please help."
)
print(result)

[36m[1mweave[0m: Logged in as Weights & Biases user: timssweeney.
[36m[1mweave[0m: View Weave data at https://wandb.ai/timssweeney/prompt_eng_workshop/weave
[36m[1mweave[0m: 🍩 https://wandb.ai/timssweeney/prompt_eng_workshop/r/call/0196ef63-d3fa-7bb3-8a8d-c108f313d6b5


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
# 1. Define the synthetic dataset
dataset = [
    {
        "email": "Hello,\nI'm John Doe. My XPhone 12 Pro keeps randomly shutting off mid-call. Please help!\nThanks, John.",
        "gold": {
            "customer_name": "John Doe",
            "product_model": "XPhone 12 Pro",
            "issue_description": "My XPhone 12 Pro keeps randomly shutting off mid-call",
        },
    },
    {
        "email": "Hi support,\nThis is Jane. I bought an AeroWatch V2 last week. The strap fell apart after one day.\nRegards,\nJane",
        "gold": {
            "customer_name": "Jane",
            "product_model": "AeroWatch V2",
            "issue_description": "The strap fell apart after one day",
        },
    },
]

# 2. Schema and fields
SCHEMA_FIELDS = ["customer_name", "product_model", "issue_description"]

# 3. Few-shot exemplars (use first 3 of dataset)
FEW_SHOT_EXAMPLES = dataset[:3]

# 4. Prompt templates
ZERO_SHOT_TEMPLATE = """
Extract the following fields from the customer email and output valid JSON:
- customer_name
- product_model
- issue_description

Email:
\"\"\"{email}\"\"\"
"""


# 5. Placeholder for model call
def call_model(prompt: str) -> str:
    """
    Replace this function with actual LLM API call.
    For example, using OpenAI:
        response = openai.ChatCompletion.create(...)
        return response.choices[0].message.content
    """
    raise NotImplementedError("Implement your LLM API call here")


# 6. Evaluation function
def evaluate(prompt_fn, use_few_shot: bool = False) -> Tuple[Dict[str, int], int]:
    """
    Runs the model against the dataset, returning per-field correct counts
    and the number of full-pass examples.
    """
    results = {f: 0 for f in SCHEMA_FIELDS}
    full_pass = 0

    few_shot_block = format_few_shot_examples(FEW_SHOT_EXAMPLES) if use_few_shot else ""

    for example in dataset:
        email = example["email"]
        gold = example["gold"]

        if use_few_shot:
            prompt = prompt_fn(few_shot_block, email)
        else:
            prompt = prompt_fn(email)

        try:
            output = call_model(prompt)
            parsed = json.loads(output)
        except Exception:
            # Malformed JSON or API error counts as failure on all fields
            continue

        ok = True
        for f in SCHEMA_FIELDS:
            if parsed.get(f, "").strip() == gold[f].strip():
                results[f] += 1
            else:
                ok = False
        if ok:
            full_pass += 1

    return results, full_pass


# 7. Prompt builder functions
def build_zero_shot_prompt(email: str) -> str:
    return ZERO_SHOT_TEMPLATE.format(email=email)


def build_few_shot_prompt(few_shot_block: str, email: str) -> str:
    return FEW_SHOT_TEMPLATE.format(examples=few_shot_block, email=email)


# 8. Main demonstration
if __name__ == "__main__":
    print("=== Zero-Shot Evaluation ===")
    try:
        zs_results, zs_full = evaluate(build_zero_shot_prompt, use_few_shot=False)
        total = len(dataset)
        for f in SCHEMA_FIELDS:
            print(f"{f}: {zs_results[f]}/{total} correct")
        print(f"Full-pass: {zs_full}/{total}")
    except NotImplementedError as e:
        print(e)

    print("\n=== Few-Shot Evaluation ===")
    try:
        fs_results, fs_full = evaluate(build_few_shot_prompt, use_few_shot=True)
        total = len(dataset)
        for f in SCHEMA_FIELDS:
            print(f"{f}: {fs_results[f]}/{total} correct")
        print(f"Full-pass: {fs_full}/{total}")
    except NotImplementedError as e:
        print(e)