In [8]:
from dotenv import load_dotenv

# load environment variables from .env file
load_dotenv(dotenv_path=".env", override=True)

from utils.fdyauth import AuthHelper
settings = AuthHelper.load_settings()
credential = AuthHelper.test_credential()

if credential:
    print('Environment and authentication OK')
else:
    print("please login first")

Environment and authentication OK


In [26]:
import os, time
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import (
    AgentEvaluationRequest,
    InputDataset,
    EvaluatorIds,
    EvaluatorConfiguration,
    AgentEvaluationSamplingConfiguration,
    AgentEvaluationRedactionConfiguration,
    AgentEvaluation,
)

# not using the settings.agent_name, since this agent name is used by other notebooks
AGENT_NAME = "my-eval-agent-1"
AGENT_INSTRUCTIONS = "You are helpful agent"

project_client = AIProjectClient(
    endpoint=settings.project_endpoint, 
    credential=credential,
#   api_version=settings.project_api_version
)

# [START evaluations_agent_sample]
# project_client.agents.enable_auto_function_calls(tools=toolset, max_retry=4)
found_agent = None
all_agents_list = project_client.agents.list_agents()
for a in all_agents_list:
    if a.name == AGENT_NAME:
        found_agent = a
        break

if found_agent:
    agent = project_client.agents.update_agent(
        agent_id=found_agent.id,
        model=settings.model_deployment_name,
        instructions=AGENT_INSTRUCTIONS,
        # toolset=toolset,
    )
    # project_client.agents.enable_auto_function_calls(tools=toolset, max_retry=4) 
    print(f"reusing agent > {agent.name} (id: {agent.id})")
else:
    agent = project_client.agents.create_agent(
        model=settings.model_deployment_name,
        name=AGENT_NAME,
        instructions=AGENT_INSTRUCTIONS,
        # toolset=toolset,
    )
    # print(f"Created agent '{AGENT_NAME}' with {len(functions._functions)} tools\nID: {agent.id}")
    print(f"Created agent '{AGENT_NAME}'\nID: {agent.id}")

thread = project_client.agents.threads.create()
print(f"Created thread, thread ID: {thread.id}")

message = project_client.agents.messages.create(
    thread_id=thread.id, role="user", content="Hello, tell me a joke"
)
print(f"Created message, message ID: {message.id}")

run = project_client.agents.runs.create(thread_id=thread.id, agent_id=agent.id)

# Poll the run as long as run status is queued or in progress
while run.status in ["queued", "in_progress", "requires_action"]:
    # Wait for a second
    time.sleep(1)
    run = project_client.agents.runs.get(thread_id=thread.id, run_id=run.id)
    print(f"Run status: {run.status}")

reusing agent > my-eval-agent-1 (id: asst_yDQOgHuLYD5zhwoebWPPunL7)
Created thread, thread ID: thread_PRIrnaeBVfzdnytNQlyn3ml8
Created message, message ID: msg_eFcn8wkQBAZpTOCPZb3RZI95
Run status: RunStatus.IN_PROGRESS
Run status: RunStatus.COMPLETED


In [28]:
agent_evaluation_request = AgentEvaluationRequest(
    run_id=run.id,
    thread_id=thread.id,
    evaluators={
        "violence": EvaluatorConfiguration(
            id=EvaluatorIds.VIOLENCE,
        )
    },
    sampling_configuration=AgentEvaluationSamplingConfiguration(
        name="agent-eval-request-sample",
        sampling_percent=100,
        max_request_rate=100,
    ),
    redaction_configuration=AgentEvaluationRedactionConfiguration(
        redact_score_properties=False,
    ),
    app_insights_connection_string=project_client.telemetry.get_connection_string(),
)

agent_evaluation_response: AgentEvaluation = project_client.evaluations.create_agent_evaluation(
    evaluation=agent_evaluation_request,
    headers={
        "model-endpoint": settings.azure_openai_endpoint,
        "api-key": settings.azure_openai_api_key,
    },
)

print(f"agent evaluation created with ID: {agent_evaluation_response.id}")
print(type(agent_evaluation_response))


# print("List evaluations")
# for evaluation in project_client.evaluations.list():
#     print(evaluation)

agent evaluation created with ID: thread_PRIrnaeBVfzdnytNQlyn3ml8;run_PkJhE6dglbxKCJZAIhh0zstk
<class 'azure.ai.projects.models._models.AgentEvaluation'>


In [30]:
# split the agent_evaluation_response.id by ";"
# the first part assign to eval_thread_id
# the second part assign to eval_run_id
eval_thread_id, eval_run_id = agent_evaluation_response.id.split(";")
print(f"Eval Thread ID: {eval_thread_id}")
print(f"Eval Run ID: {eval_run_id}")

Eval Thread ID: thread_PRIrnaeBVfzdnytNQlyn3ml8
Eval Run ID: run_PkJhE6dglbxKCJZAIhh0zstk


In [31]:
project_client.agents.runs.get(thread_id=eval_thread_id, run_id=eval_run_id)

{'id': 'run_PkJhE6dglbxKCJZAIhh0zstk', 'object': 'thread.run', 'created_at': 1751892638, 'assistant_id': 'asst_yDQOgHuLYD5zhwoebWPPunL7', 'thread_id': 'thread_PRIrnaeBVfzdnytNQlyn3ml8', 'status': 'completed', 'started_at': 1751892638, 'expires_at': None, 'cancelled_at': None, 'failed_at': None, 'completed_at': 1751892639, 'required_action': None, 'last_error': None, 'model': 'gpt-4.1-mini', 'instructions': 'You are helpful agent', 'tools': [], 'tool_resources': {}, 'metadata': {}, 'temperature': 1.0, 'top_p': 1.0, 'max_completion_tokens': None, 'max_prompt_tokens': None, 'truncation_strategy': {'type': 'auto', 'last_messages': None}, 'incomplete_details': None, 'usage': {'prompt_tokens': 29, 'completion_tokens': 22, 'total_tokens': 51, 'prompt_token_details': {'cached_tokens': 0}}, 'response_format': 'auto', 'tool_choice': 'auto', 'parallel_tool_calls': True}

In [33]:
project_client.agents.threads.get(thread_id=eval_thread_id)

{'id': 'thread_PRIrnaeBVfzdnytNQlyn3ml8', 'object': 'thread', 'created_at': 1751892636, 'metadata': {}, 'tool_resources': {'code_interpreter': {'file_ids': []}, 'azure_ai_search': {'indexes': []}}}