# Vendor Contract QA Agent Documentation POC

## Pre-requisites

In [None]:
%pip install google-search-results --quiet
%pip install beautifulsoup4 --quiet

In [None]:
# load openai api key
import os

from dotenv import load_dotenv

load_dotenv()

if not "OPENAI_API_KEY" in os.environ:
    raise ValueError("OPENAI_API_KEY is not set")

if not "SERP_API_KEY" in os.environ:
    raise ValueError(
        "SERP_API_KEY is not set. "
        "You can get it from https://serpapi.com/manage-api-key."
    )

In [None]:
import sys

sys.path.append(os.getcwd())

from utils import (
    client,
    init_db,
    get_schema_description,
    get_tools_spec,
)

In [None]:
init_db()

# Create Agent

### Load Tools and DB Schema Spec

In [None]:
tools_spec = get_tools_spec()

In [None]:
print(get_schema_description())

### Create an Assistant

In [None]:
AGENT_SYSTEM_PROMPT = f"""
# Mission:
You are an AI Agent that helps employees answer questions they might have about everything related to software vendors.
You will be asked questions such as "Do we have a vendor for cloud storage?" or "I need a tool for project management".
You should use the tools available to you as well as semantic search on the documents you have access to to answer these questions.

# Guidelines:
For "Do we have a vendor for cloud storage?", you could use the `query_database` to query the contracts database for cloud storage vendors.
Then you could search your document repository for information on the vendors you found.
If none are found, then you might search online using the `search_online` tool to discover new vendors.
Or, for the question "I need a tool for project management", if you cannot find a relevant vendor in the database,
  you could use the `search_online` tool to find out if any existing vendors provide project management tools.

# Constraints:
You should always try and find relevant information from the database.
You can search online to find new information or confirm information that you already know - you don't have to confirm with the user to perform a search.
You should only fall back to your existing knowledge of vendors to help you come up with good search queries or when you want to enrich your answers.
  - For example, if the user is asking for a certain product and you find a vendor in the db that doesn't mention that product but you know they offer it, you can share that with the user.
You should only use your document retrieval system to find extra information related to vendors found in the database or online - essentially to enrich your knowledge before answering.
Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.

# DB Schema:
{get_schema_description()}
""".strip()

AGENT_NAME = "Vendor Contract Q/A Assistant"
AGENT_ID = None

for assistant in client.beta.assistants.list():
    # if we already have an agent with the same name, use that
    print("Using existing agent...")
    if assistant.name == AGENT_NAME:
        AGENT_ID = assistant.id
        break

if AGENT_ID is None:
    # Create an agent using the OpenAI Assistants API
    print("Creating agent...")
    agent = client.beta.assistants.create(
        name=AGENT_NAME,
        instructions=AGENT_SYSTEM_PROMPT,
        model="gpt-4o",
        tools=tools_spec,
    )
    AGENT_ID = agent.id

print(f"Assistant ID: {AGENT_ID}")

# Use the Agent

In [None]:
from utils import AgentEventHandler

os.environ["DEBUG"] = "0"


def single_pass_agent(input):
    thread = client.beta.threads.create()

    client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=input["question"],
    )

    with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=AGENT_ID,
        event_handler=AgentEventHandler(input),
    ) as stream:
        stream.until_done()

    return input

In [None]:
import json

input = {"question": "Do we have contracts with microsoft?"}

single_pass_agent(input)

print(json.dumps(input, indent=5))

# Use the Agent with ValidMind

In [None]:
import pandas as pd
import validmind as vm

vm.init(
    api_host="https://api.prod.validmind.ai/api/v1/tracking",
    api_key="...",
    api_secret="...",
    model="...",
)

vm_model = vm.init_model(
    predict_fn=single_pass_agent,
    input_id="vendor_qa_agent",
)

vm_model.predict(pd.DataFrame({"question": ["Do we have contracts with microsoft?"]}))

In [None]:
test_df = pd.DataFrame(
    {
        "question": [
            "Do we have contracts with microsoft?",
            "Do we have contracts with google?",
            "Do we have contracts with amazon?",
            "What vendors do we have that offer cloud storage?",
            "Do we have relationships with server hardware vendors?",
            "How much is our total spend on project management software?",
            "I need an ERP system for our company. Can you help me find one?",
        ],
        "ground_truth": [
            "Yes, we have 2 contracts with Microsoft: C014 and C015.",
            "No, we do not have contracts with Google.",
            "Yes, we have 2 contracts with Amazon: C007 and C008.",
            "We have multiple vendors who have a cloud storage offering: Amazon Web Services (Vendor ID: V005), Microsoft (Vendor ID: V011), IBM (Vendor ID: V012) and Oracle (Vendor ID: V014)",
            "We have relationships with the following server hardware vendors: Dell (Vendor ID: V013), HP (Vendor ID: V015) and Cisco (Vendor ID: V001).",
            "We don't have any existing contracts for project management software. So the total spend is $0.",
            "We have an existing relationship with SAP for ERP software (Vendor ID: V004). The following two contracts are in place: C005 and C006.",
        ],
    }
)

vm_test_dataset = vm.init_dataset(
    test_df,
    input_id="vendor_qa_test_dataset",
)

In [None]:
vm_test_dataset.assign_predictions(vm_model)

In [None]:
import json

pred_column = vm_test_dataset.prediction_column(vm_model)

print(json.dumps(vm_test_dataset.df[pred_column][0], indent=5))

In [None]:
vm.tests.list_tests(filter="rag")

In [None]:
for test_id in sorted(vm.tests.list_tests(filter="rag", pretty=False)):
    vm.tests.describe_test(test_id)

In [None]:
from validmind.tests import run_test

rag_tests_inputs = {"dataset": vm_test_dataset}

# for the tests that need the contexts and generated answer, we can pass a mapping as parameters
rag_tests_params = {
    "contexts_column": f"{pred_column}.contexts",
    "answer_column": lambda row: "\n\n".join(row[pred_column]["messages"]),
}

In [None]:
vm_test_dataset.df.head()

In [None]:
import os

os.environ["VM_OVERRIDE_METADATA"] = "true"

In [None]:
result = run_test(
    "validmind.model_validation.ragas.AnswerCorrectness",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.AnswerRelevance",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.AnswerSimilarity",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.AspectCritique",
    inputs=rag_tests_inputs,
    params={
        **rag_tests_params,
        "additional_aspects": [
            (
                "professionalism",
                "Ensure the response is professional and appropriate for a business setting.",
            ),
        ],
    },
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextEntityRecall",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextPrecision",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextRecall",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextRelevancy",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)
result.log()

In [None]:
result = run_test(
    "validmind.model_validation.ragas.Faithfulness",
    inputs=rag_tests_inputs,
    params=rag_tests_params,
)