Document comparison

# Travel planning requirements vs. generated day-to-day plan using available itinerary information

* Flight, Hotel, Airport parking, Car rental

* Desired activities and destinations


## Configure OpenAI Settings

In [1]:
import os
import openai
from dotenv import load_dotenv
# Set up Azure OpenAI
load_dotenv()

openai.api_type = "azure"

AZURE_OPENAI_API_VERSION = os.getenv("AAG_AZURE_OPENAI_API_VERSION")
openai.api_version = AZURE_OPENAI_API_VERSION

AZURE_OPENAI_API_KEY = os.getenv("AAG_AZURE_OPENAI_API_KEY").strip()
assert AZURE_OPENAI_API_KEY, "ERROR: Azure OpenAI Key is missing"
openai.api_key = AZURE_OPENAI_API_KEY

AZURE_OPENAI_ENDPOINT = os.getenv("AAG_AZURE_OPENAI_ENDPOINT","").strip()
assert AZURE_OPENAI_ENDPOINT, "ERROR: Azure OpenAI Endpoint is missing"
openai.api_base = AZURE_OPENAI_ENDPOINT

# Deployment for Chat
# DEPLOYMENT_NAME_CHAT = os.getenv('DEPLOYMENT_NAME_CHAT')
DEPLOYMENT_NAME_CHAT = os.getenv('AAG_DEPLOYMENT_NAME_CHAT_16K')

# Deployment for embedding
DEPLOYMENT_NAME_EMBEDDING = os.getenv("AAG_DEPLOYMENT_NAME_EMBEDDING")
model: str = DEPLOYMENT_NAME_EMBEDDING

# Azure AI Search (Cognitive vector store)
vector_store_address: str = os.getenv("AAG_AZURE_SEARCH_SERVICE_ENDPOINT")  
vector_store_password: str = os.getenv("AAG_AZURE_SEARCH_ADMIN_KEY")
# index_name: str = "langchain-vector-arxiv-physics"

# Deployment for embedding
BING_SUBSCRIPTION_KEY = os.getenv("BING_SUBSCRIPTION_KEY")

## requirement

In [2]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

loader = TextLoader("./ask/skitrip7daysitinerary.txt", encoding="utf-8")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs_request = text_splitter.split_documents(documents)


#### Split documents into chunks

In [None]:
docs_request

In [None]:
loader = TextLoader("./reports/ai_Ski_trip_planning_001.txt", encoding="utf-8")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs_report = text_splitter.split_documents(documents)

In [None]:
docs_report

In [6]:
splitted_docs = docs_request + docs_report

In [None]:
splitted_docs

### Create embeddings and vector store instances

### FAISS vector store

In [8]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# Get Azure OpenAI embedding
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
    deployment=model,
    model=model,
    chunk_size=1,   # this 'chunk_size' is misleading, it is really about 'input' text string, not the number of words or characters in the text.
    openai_api_base=AZURE_OPENAI_ENDPOINT,
    openai_api_type="azure",
    api_key=AZURE_OPENAI_API_KEY,
)

# Create the vector index
db = FAISS.from_documents(splitted_docs, embeddings)

In [9]:
db.save_local("faiss_index_doc_compare")

In [10]:
# Set up the retriever we want to use, and then turn it into a retriever tool
faiss_db_retriever = db.as_retriever()

In [11]:
from langchain.agents.agent_toolkits import create_retriever_tool

# Tool define
faiss_retriever = create_retriever_tool(
    faiss_db_retriever,
    "Travel plan comparison",
    "This tool is help to compare the Ski trip planning requirement vs generated trip report",
)

In [12]:
from langchain.chat_models import AzureChatOpenAI
# Azure OpenAI model
llm = AzureChatOpenAI(
    azure_deployment="gpt-35-turbo-16k",
    temperature=0,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_api_type="azure",
    api_key=AZURE_OPENAI_API_KEY,
)

In [13]:
from langchain.agents import initialize_agent, AgentType
# Next, let's load some tools to use. Note that the `llm-math` tool uses an LLM, so we need to pass that in.
faiss_retriever_tools = [
    faiss_retriever,
]

# tools = load_tools(["bing-search"], llm=llm)
test_agent = initialize_agent(
    faiss_retriever_tools,
    llm,
    agent="zero-shot-react-description",
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=5,
)


In [14]:
# Now let's test it out!
test_agent.run("Is trip report matching the trip planning requirements? If you rate it (0-no match, 9-best match), what is the score?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to compare the trip report with the trip planning requirements to determine if they match. I should rate the match on a scale of 0 to 9.
Action: Travel plan comparison
Action Input: Trip report, Trip planning requirements[0m
Observation: [36;1m[1;3m[Document(page_content='Here is the comprehensive 7-day ski trip itinerary:\n\nDay 1: Departure\nDate: (Fill in date)\nArrive at the airport. Airport Parking Info: (Fill in parking info)\nPark your car at the pre-booked parking spot.\nCheck-in for your flight. Flight Info: (Fill in flight info)\nDepart for your destination.\nUpon arrival, pick up your pre-booked rental car. Car Rental Info: (Fill in car rental info)\nDrive to your hotel.\nHotel Contact Info: (Fill in hotel info)\nDinner at a local restaurant. Restaurant Suggestions: (Fill in restaurant info)\n\nDay 2: Ski Resort 1\nDate: (Fill in date)\nSki Resort Info: (Fill in resort info)\nAddress: (Fill in address)\nP

'The trip report matches the trip planning requirements with a score of 8 out of 9.'

# Score is 8 out of 9

In [17]:
# Now let's test it out!
test_agent.run("Please compare skitrip7daysitinerary.txt and  ai_Skitrip_planning_001.txt and highlight the difference")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to compare the contents of skitrip7daysitinerary.txt and ai_Skitrip_planning_001.txt to identify any differences.
Action: Travel plan comparison
Action Input: skitrip7daysitinerary.txt, ai_Skitrip_planning_001.txt[0m
Observation: [36;1m[1;3m[Document(page_content='Here is the comprehensive 7-day ski trip itinerary:', metadata={'source': './reports/ai_Ski_trip_planning_001.txt'}), Document(page_content='Here is the comprehensive 7-day ski trip itinerary:\n\nDay 1: Departure\nDate: (Fill in date)\nArrive at the airport. Airport Parking Info: (Fill in parking info)\nPark your car at the pre-booked parking spot.\nCheck-in for your flight. Flight Info: (Fill in flight info)\nDepart for your destination.\nUpon arrival, pick up your pre-booked rental car. Car Rental Info: (Fill in car rental info)\nDrive to your hotel.\nHotel Contact Info: (Fill in hotel info)\nDinner at a local restaurant. Restaurant Suggestions: (Fill in

'Agent stopped due to iteration limit or time limit.'