### Running RAG Based Agent Evaluation


In [None]:
%pip install openai

In [None]:
%pip install azure-ai-evaluation

In [None]:
%pip install azure-ai-projects==1.0.0b5

#### Importing Libraries and Utilities


In [None]:
import os
from openai import AzureOpenAI
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential 
from azure.ai.projects import AIProjectClient  

In [None]:
load_dotenv()
# [START create_project_client]
project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=os.getenv("PROJECT_CONNECTION_STRING")
)

model = os.getenv("AZURE_OPENAI_DEPLOYMENT")

#### Setting in the system prompt/instruction set for the QA Generator agent

In [None]:
system_prompt_1 = f"""You are meant to behave as an evaluation agent for azure ai agent evaluation.
The azure ai agent utilizes a RAG based approach to generate responses to user queries.
We need to evaluate the responses generated by the azure ai agent.
We will utilize the azure ai evaluation SDK for this purpose and specifically the Groundedness score which 
measures the extent to which the response is grounded in the context of the conversation.

The groundedness score tool takes in the following parameters:
query: The query that was used to generate the response.
context: The context that should be used/ or was used to generate the response.
response: The response that was generated by the model.

You will be passed a text document on top of which you need to generate one question that will help measure the groundedness of the response.

the output should strictly adhere to the following format:
<question>

Include only the question in the output and nothing else, not even filler words.
"""

#### Creating our QA Agent

In [None]:
QA_agent = project_client.agents.create_agent(  
        model="gpt-4",
        name="QA_generator",
        instructions=system_prompt_1,
    )
print(f"Created agent, agent ID: {QA_agent.id}")
QA_agent_id = QA_agent.id




#### Creating our Thread for the QA Agent

In [None]:
QA_thread = project_client.agents.create_thread()
print(f"Created thread, thread ID: {QA_thread.id}")

QA_thread_id = QA_thread.id


In [None]:
eval_dict = []
 

#### Storing QA Response as a collection

In [None]:
for file in os.listdir("collateral"):
    with open(f"collateral/{file}", "r") as f:
        document_text= f.read()
        
        message = project_client.agents.create_message(
        thread_id=QA_thread_id,
        role="user",
        content=f"the document text is: {document_text} ",
        )
        
        run = project_client.agents.create_and_process_run(thread_id=QA_thread_id, assistant_id=QA_agent_id)
        print(f"Run finished with status: {run.status}")
        
        messages = project_client.agents.list_messages(thread_id=QA_thread_id)
        #Displaying the assistant response
        print(messages.data[0].content[0].text.value)
        
        assistant_response = messages.data[0].content[0].text.value
        
        query_dict =  {"query": assistant_response,
                            "context": document_text
                         }
        
        
        
        eval_dict.append(query_dict)
        
        print(eval_dict)

In [None]:
index_name=os.getenv("AI_SEARCH_INDEX_NAME")
print(index_name)

In [None]:
from azure.ai.projects.models import AzureAISearchTool, ConnectionType

In [None]:
conn_list = project_client.connections.list()
conn_id = ""
for conn in conn_list:
    if conn.connection_type == ConnectionType.AZURE_AI_SEARCH:
        conn_id = conn.id
        break
print(conn_id)

In [None]:
ai_search = AzureAISearchTool(index_connection_id=conn_id, index_name=index_name)

#### Creating our RAG Agent with Azure AI Search Index

In [None]:
RAG_agent = project_client.agents.create_agent(
        model=model,
        name="ai-search-assistant",
        instructions="You are a helpful assistant",
        tools=ai_search.definitions,
        tool_resources=ai_search.resources,
        headers={"x-ms-enable-preview": "true"},
    )
    # [END create_agent_with_azure_ai_search_tool]
print(f"Created agent, ID: {RAG_agent.id}")

In [None]:
RAG_thread = project_client.agents.create_thread()
print(f"Created thread, ID: {RAG_thread.id}")

#### Generating Response from the RAG Agent to fill in the "Response" Parameter

In [None]:
for eval_collection in eval_dict:
    query = eval_collection["query"]
    
    message = project_client.agents.create_message(
        thread_id=RAG_thread.id,
        role="user",
        content=f"answer this query{query}",
        )
    
    run = project_client.agents.create_and_process_run(thread_id=RAG_thread.id, assistant_id=RAG_agent.id)
    print(f"Run finished with status: {run.status}")
        
    messages = project_client.agents.list_messages(thread_id=RAG_thread.id)
        #Displaying the assistant response
    print(messages.data[0].content[0].text.value)
        
    assistant_response = messages.data[0].content[0].text.value
    
    eval_collection["response"] = assistant_response
    
    

In [None]:
import json

In [None]:

with open('eval_dict.json', 'w') as f:
    json.dump(eval_dict, f, indent=4)

In [None]:
from azure.ai.evaluation import GroundednessProEvaluator, GroundednessEvaluator

In [None]:
azure_ai_project = {
    "subscription_id": os.getenv("AZURE_SUBSCRIPTION_ID"),
    "resource_group_name": os.getenv("AZURE_RESOURCE_GROUP"),
    "project_name": os.getenv("AZURE_PROJECT_NAME"),
}

model_config = {
    "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
    "api_key": os.getenv("AZURE_OPENAI_API_KEY"),
    "azure_deployment": os.getenv("AZURE_OPENAI_DEPLOYMENT"),
    "api_version": os.getenv("AZURE_OPENAI_API_VERSION"),
}

In [None]:
credential = DefaultAzureCredential()
groundedness_eval = GroundednessEvaluator(model_config)



#### Evaluating the Groundness of the Response from the RAG Agent

In [None]:
for eval_collection in eval_dict:
    groundedness_score = groundedness_eval(
    **eval_collection
    )
    
    print(groundedness_score)
    
    json.dumps(groundedness_score)
    
    eval_collection['groundedness_score'] = groundedness_score['groundedness']
    eval_collection['groundedness_reason'] = groundedness_score['groundedness_reason']
    


In [None]:
with open("eval_results.json", "w") as f:
    json.dump(eval_dict, f, indent=4)