In [1]:
!pip install --quiet azure-ai-projects==1.0.0b9 azure-identity azure-ai-evaluation dotenv

In [4]:
import dotenv
dotenv.load_dotenv()

True

In [5]:

import os
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from azure.ai.projects.models import FunctionTool, ToolSet

# Import your custom functions to be used as Tools for the Agent
from user_functions import user_functions

project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=os.environ["PROJECT_CONNECTION_STRING"],
)

AGENT_NAME = "Seattle Tourist Assistant"

# Add Tools to be used by Agent
functions = FunctionTool(user_functions)

toolset = ToolSet()
toolset.add(functions)

# To enable tool calls executed automatically
project_client.agents.enable_auto_function_calls(toolset=toolset)

### Create an AI agent (Azure AI Agent Service)

In [6]:
agent = project_client.agents.create_agent(
    model=os.environ["AGENT_MODEL_DEPLOYMENT_NAME"],
    name=AGENT_NAME,
    instructions="You are a helpful assistant",
    toolset=toolset,
)

print(f"Created agent, ID: {agent.id}")

Created agent, ID: asst_u5WNZ58r4Wv1Y77SHMCUfjhE


In [7]:
# Create a thread for the agent to interact with
thread = project_client.agents.create_thread()
print(f"Created thread, ID: {thread.id}")

# Create message to thread

MESSAGE = "Can you email me weather info for Tokyo ?"

message = project_client.agents.create_message(
    thread_id=thread.id,
    role="user",
    content=MESSAGE,
)
print(f"Created message, ID: {message.id}")

Created thread, ID: thread_HbYRkpPfsJOleCdHVb7390jA
Created message, ID: msg_YJsCCEglwhyv4lu5bAcEbwJf


In [8]:
### Execute the Agent run
run = project_client.agents.create_and_process_run(thread_id=thread.id, agent_id=agent.id)

print(f"Run finished with status: {run.status}")

if run.status == "failed":
    print(f"Run failed: {run.last_error}")

print(f"Run ID: {run.id}")

Sending email to user@example.com...
Subject: Weather Information for Tokyo
Body:
The current weather in Tokyo is rainy with a temperature of 22°C.
Run finished with status: RunStatus.COMPLETED
Run ID: run_Noy2qRKyrFZ6IFWPRC4lo4so


In [9]:
for message in project_client.agents.list_messages(thread.id, order="asc").data:
    print(f"Role: {message.role}")
    print(f"Content: {message.content[0].text.value}")
    print("-" * 40)

Role: MessageRole.USER
Content: Can you email me weather info for Tokyo ?
----------------------------------------
Role: MessageRole.AGENT
Content: I have successfully emailed you the weather information for Tokyo. It is currently rainy with a temperature of 22°C.
----------------------------------------


# Evaluate

### Get data from agent

In [10]:
from azure.ai.evaluation import AIAgentConverter
import json


# Initialize the converter that will be backed by the project.
converter = AIAgentConverter(project_client)

thread_id = thread.id
run_id = run.id
file_name = "evaluation_data.jsonl"

# Get a single agent run data
evaluation_data_single_run = converter.convert(thread_id=thread_id, run_id=run_id)



In [11]:
# Run this to save thread data to a JSONL file for evaluation
# Save the agent thread data to a JSONL file
evaluation_data = converter.prepare_evaluation_data(thread_ids=thread_id, filename=file_name)
print(json.dumps(evaluation_data, indent=4))

[
    {
        "query": [
            {
                "role": "system",
                "content": "You are a helpful assistant"
            },
            {
                "createdAt": "2025-08-30T11:34:37Z",
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Can you email me weather info for Tokyo ?"
                    }
                ]
            }
        ],
        "response": [
            {
                "createdAt": "2025-08-30T11:34:43Z",
                "run_id": "run_Noy2qRKyrFZ6IFWPRC4lo4so",
                "role": "assistant",
                "content": [
                    {
                        "type": "tool_call",
                        "tool_call_id": "call_tRi9q0o6aSNdHV3LyK1mTkqs",
                        "name": "fetch_weather",
                        "arguments": {
                            "location": "Tokyo"
                        }
       

### Setting up evaluator

We will select the following evaluators to assess the different aspects relevant for agent quality: 

- [Intent resolution](https://aka.ms/intentresolution-sample): measures the extent of which an agent identifies the correct intent from a user query. Scale: integer 1-5. Higher is better.
- [Tool call accuracy](https://aka.ms/toolcallaccuracy-sample): evaluates the agent’s ability to select the appropriate tools, and process correct parameters from previous steps. Scale: float 0-1. Higher is better.
- [Task adherence](https://aka.ms/taskadherence-sample): measures the extent of which an agent’s final response adheres to the task based on its system message and a user query. Scale: integer 1-5. Higher is better.


In [12]:
from azure.ai.evaluation import (
    ToolCallAccuracyEvaluator,
    AzureOpenAIModelConfiguration,
    IntentResolutionEvaluator,
    TaskAdherenceEvaluator,
)
from pprint import pprint

model_config = AzureOpenAIModelConfiguration(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
)
# Needed to use content safety evaluators
azure_ai_project = {
    "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
    "project_name": os.environ["PROJECT_NAME"],
    "resource_group_name": os.environ["RESOURCE_GROUP_NAME"],
}

intent_resolution = IntentResolutionEvaluator(model_config=model_config)

tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)

task_adherence = TaskAdherenceEvaluator(model_config=model_config)



### Run Evaluator

In [13]:
from azure.ai.evaluation import evaluate

response = evaluate(
    data=file_name,
    evaluators={
        "tool_call_accuracy": tool_call_accuracy,
        "intent_resolution": intent_resolution,
        "task_adherence": task_adherence,
    },
    azure_ai_project={
        "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
        "project_name": os.environ["PROJECT_NAME"],
        "resource_group_name": os.environ["RESOURCE_GROUP_NAME"],
    },
)
pprint(f'AI Foundary URL: {response.get("studio_url")}')

2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Finished 1 / 7 lines.
2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Average execution time for completed lines: 1.1 seconds. Estimated time for incomplete lines: 6.6 seconds.
2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Finished 2 / 7 lines.
2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Average execution time for completed lines: 0.55 seconds. Estimated time for incomplete lines: 2.75 seconds.
2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Finished 3 / 7 lines.
2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Average execution time for completed lines: 0.38 seconds. Estimated time for incomplete lines: 1.52 seconds.
2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Finished 4 / 7 lines.
2025-08-30 11:36:16 +0000 281471784907168 execution.bulk     INFO     Average execution time for 




Run name: "tool_call_accuracy_20250830_113615_052182"
Run status: "Completed"
Start time: "2025-08-30 11:36:15.052182+00:00"
Duration: "0:00:05.020834"


{
    "tool_call_accuracy": {
        "status": "Completed",
        "duration": "0:00:05.020834",
        "completed_lines": 7,
        "failed_lines": 0,
        "log_path": null
    },
    "intent_resolution": {
        "status": "Completed",
        "duration": "0:00:02.005816",
        "completed_lines": 7,
        "failed_lines": 0,
        "log_path": null
    },
    "task_adherence": {
        "status": "Completed",
        "duration": "0:00:02.003541",
        "completed_lines": 7,
        "failed_lines": 0,
        "log_path": null
    }
}


('AI Foundary URL: '
 'https://ai.azure.com/build/evaluation/794d72d3-f75e-41e4-960c-b8cae1620b3e?wsid=/subscriptions/dfe81aa9-b256-4fe8-9cd5-23732f4d59c6/resourceGroups/TechNTomorrow/providers/Microsoft.MachineLearningServices/workspaces/tntproject')
