# BNY Vendor Contract QA Agent Documentation POC

## Pre-requisites

In [50]:
%pip install termcolor duckduckgo-search beautifulsoup4 --quiet

Note: you may need to restart the kernel to use updated packages.


In [45]:
# load openai api key
import os

from dotenv import load_dotenv
load_dotenv()

if not 'OPENAI_API_KEY' in os.environ:
    raise ValueError('OPENAI_API_KEY is not set')

GPT_MODEL = "gpt-3.5-turbo"
os.environ["OPENAI_GPT_MODEL"] = GPT_MODEL

In [62]:
# import utils (with ability to reload changes)
import importlib
import sys

sys.path.append(os.getcwd())

import utils

importlib.reload(utils)
from utils import (
    chat_completion_request,
    client,
    init_db,
    get_schema_description,
    get_tools_spec,
    pretty_print_conversation,
    query_db,
)

In [48]:
tools_spec = get_tools_spec()

Loaded 2 tool definitions
[
      {
            "type": "function",
            "function": {
                  "name": "query_database",
                  "description": "Query the contract database to retrieve information about contracts with a particular vendor",
                  "parameters": {
                        "type": "object",
                        "properties": {
                              "query": {
                                    "type": "string",
                                    "description": "SQL query to execute against the database"
                              }
                        },
                        "required": [
                              "query"
                        ]
                  }
            }
      },
      {
            "type": "function",
            "function": {
                  "name": "search_online",
                  "description": "Search the web for information",
                  "parameters": {
             

In [60]:
import requests

from duckduckgo_search import DDGS
from bs4 import BeautifulSoup

def search_online(search_type, query=None, url=None):
    if search_type == "browse":
        # use duckduckgo to search for the query
        results = DDGS().text(query, max_results=5)
        return results

    if search_type == "scrape":
        # scrape the text of the url
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        text = soup.get_text()
        return text.strip()

    raise ValueError(f"Unknown search type: {search_type}")

In [58]:
results = search_online("browse", query="how to install python")
results

[{'title': 'Download Python | Python.org',
  'href': 'https://www.python.org/downloads/',
  'body': 'Find the latest version of Python for your operating system and download it for free. Learn about the release schedule, maintenance status, and release notes of different Python versions.'},
 {'title': 'Python 3 Installation & Setup Guide - Real Python',
  'href': 'https://realpython.com/installing-python/',
  'body': 'Learn how to install Python on Windows, macOS, Linux, iOS, Android and online using official Python distributions. Find out how to check your Python version, choose the right options for your needs, and get started with Python programming.'},
 {'title': 'How to Install Python on Windows - How-To Geek',
  'href': 'https://www.howtogeek.com/197947/how-to-install-python-on-windows/',
  'body': 'Learn how to download and install the latest version of Python 3 from the official website, and how to add it to your PATH for easy access. Also, find out how to manage multiple Pytho

In [61]:
search_online("scrape", url=results[0]["href"])

'Download Python | Python.org\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nNotice: While JavaScript is not essential for this website, your interaction with the content will be limited. Please turn JavaScript on for the full experience. \n\n\n\n\n\n\nSkip to content\n\n\n▼ Close\n                \n\n\nPython\n\n\nPSF\n\n\nDocs\n\n\nPyPI\n\n\nJobs\n\n\nCommunity\n\n\n\n▲ The Python Network\n                \n\n\n\n\n\n\n\n\n\nDonate\n\n≡ Menu\n\n\nSearch This Site\n\n\n                                    GO\n                                \n\n\n\n\n\nA A\n\nSmaller\nLarger\nReset\n\n\n\n\n\n\nSocialize\n\nLinkedIn\nMastodon\nChat on IRC\nTwitter\n\n\n\n\n\n\n\n\n\n\nAbout\n\nApplications\nQuotes\nGetting Started\nHelp\nPython Brochure\n\n\n\nDownloads\n\nAll releases\nSource code\nWindows\nmacOS\nOther Platforms\nLicense\nAlternative Implementations\n\n\n\nDocumentation\n\nDocs\nAudio/Visual Talks\nBeginner\'s Guide\nDeveloper\'s Guide\nFAQ\nNon-English Docs\nPEP Index\nPython Books\nPytho

In [None]:
from typing_extensions import override
from openai import AssistantEventHandler


class FunctionCallHandler(AssistantEventHandler):
    @override
    def on_event(self, event):
        if event.event == "thread.run.requires_action":
            run_id = event.data.id
            self.handle_requires_action(event.data, run_id)

    def handle_requires_action(self, data, run_id):
        tool_outputs = []

        for tool in data.required_action.submit_tool_outputs.tool_calls:
            if tool.function.name == "query_database":
                tool_outputs.append(
                    {
                        "tool_call_id": tool.id,
                        "output": query_db(**data.function.arguments),
                    }
                )
            elif tool.function.name == "search_online":
                tool_outputs.append({"tool_call_id": tool.id, "output": search_online(**data.function.arguments)})

        # Submit all tool_outputs at the same time
        self.submit_tool_outputs(tool_outputs, run_id)

    def submit_tool_outputs(self, tool_outputs, run_id):
        # Use the submit_tool_outputs_stream helper
        with client.beta.threads.runs.submit_tool_outputs_stream(
            thread_id=self.current_run.thread_id,
            run_id=self.current_run.id,
            tool_outputs=tool_outputs,
            event_handler=FunctionCallHandler(),
        ) as stream:
            for text in stream.text_deltas:
                print(text, end="", flush=True)

            print()

In [23]:
SYSTEM_PROMPT = """
# Mission:
You are an AI Agent that helps employees answer questions they might have about everything related to software vendors.
You will be asked questions such as "Do we have a vendor for cloud storage?" or "I need a tool for project management".
You should use the tools available to you as well as semantic search on the documents you have access to to answer these questions.

# Guidelines:
For "Do we have a vendor for cloud storage?", you could use the `query_database` to query the contracts database for cloud storage vendors.
Then you could search your document repository for information on the vendors you found.
If none are found, then you might search online using the `search_online` tool to discover new vendors.
Or, for the question "I need a tool for project management", if you cannot find a relevant vendor in the database,
  you could use the `search_online` tool to find out if any existing vendors provide project management tools.

# Constraints:
You should always try and find relevant information from the database.
You should only fall back to your existing knowledge of vendors to help you come up with good search queries, not to answer the questions directly.
You should not use the `search_online` tool to answer questions that can be answered using the tools you have access to.
You should only use your document retrieval system to find extra information related to vendors found in the database or online - essentially to enrich your knowledge before answering.
Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.
""".strip()


In [None]:
messages = [
    {"role": "system", "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."},
    {"role": "user", "content": "Can you find me"},
]