In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv('../application/.env'))

True

In [3]:
import os
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from operator import itemgetter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector
from langchain.schema.messages import get_buffer_string
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema import format_document
from langchain.schema.runnable import RunnableParallel

host = os.getenv("PG_VECTOR_HOST")
user = os.getenv("PG_VECTOR_USER")
password = os.getenv("PG_VECTOR_PASSWORD")
COLLECTION_NAME = os.getenv("PGDATABASE")
CONNECTION_STRING = f"postgresql+psycopg2://{user}:{password}@{host}:5432/{COLLECTION_NAME}"

embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))
store = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=embeddings,
)
retriever = store.as_retriever()

model = ChatOpenAI()

In [20]:
retriever.get_relevant_documents("When are the opening hours?")
# The retriever interface provides a more standardized interface to get documents from the vector store, and it's got the method get relevant documents.  
# So now let's use the retriever and we want to run the get relevant documents method. We ask when are the opening hours. This is inside our opening hours dot txt.  
# So we should be able to retrieve the correct chunks that we stored into the vector store.  

[Document(page_content='Monday to Thursday: 11:00 AM - 11:00 PM Friday: 11:00 AM - 12:00 AM (midnight) Saturday: 10:00 AM - 12:00 AM (midnight) Sunday: 10:00 AM - 11:00 PM Special Hours: Our kitchen closes 30 minutes before', metadata={'source': 'opening_hours.txt'}),
 Document(page_content='the exact amount before confirming your order. Restaurant Opening Hours:', metadata={'source': 'opening_hours.txt'}),
 Document(page_content='La Tavola Calda - Delivery Service & Opening Hours', metadata={'source': 'opening_hours.txt'}),
 Document(page_content="30 minutes before the restaurant closing time. Whether you're craving a quick lunch, planning a cozy dinner at home, or simply indulging in a late-night snack, La Tavola Calda is just a chat away.", metadata={'source': 'opening_hours.txt'})]

In [21]:
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
### Line 31-39: Created a regular PromptTemplate named CONDENSE_QUESTION_PROMPT which takes chat history and a follow-up question as input and instructs the language model 
### to rephrase the follow-up question into a standalone one.
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)
### Line 42-49: Created a ChatPromptTemplate named ANSWER_PROMPT which takes context and a question as input and instructs the language model to answer the question based 
### on that context.
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
### Line 51-52: Creates a simple PromptTemplate named DEFAULT_DOCUMENT_PROMPT which just formats a document for the language model.

In [22]:
### 5. Building a Data Processing Pipeline:

def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [23]:
### 5. Building a Data Processing Pipeline:

_inputs = RunnableParallel(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: get_buffer_string(x["chat_history"])
    )
    | CONDENSE_QUESTION_PROMPT
    | ChatOpenAI(temperature=0)
    | StrOutputParser(),
)
### Line 65-74: Defines a RunnableParallel named _inputs which performs several tasks in parallel:
### Extracts chat history from the input dictionary.
### Passes the chat history and question through the CONDENSE_QUESTION_PROMPT to rephrase the question.
### Sends the rephrased question to the language model with a temperature of 0 (for more predictable outputs).
### Extracts the text response from the language model using StrOutputParser.

In [24]:
### 6. Preparing context for answer generation:

_context = {
    "context": itemgetter("standalone_question") | retriever | _combine_documents,
    "question": lambda x: x["standalone_question"],
}
### Line 77-82: Defines a dictionary named _context which holds two keys:
### context: This uses a chain of operations (itemgetter to extract the rephrased question, the retriever to find relevant documents, and _combine_documents to process them) to create the answer context.
### question: This simply extracts the question from the input dictionary.


In [25]:
conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI() | StrOutputParser()
### Line 84-86: Creates a chain named conversational_qa_chain by connecting the various components:
### 	• _inputs for processing the input query.
### 	• _context to prepare the answer context.
### 	• ANSWER_PROMPT to guide the language model with context and question.
### 	• The language model (ChatOpenAI) to generate the answer.
### StrOutputParser to extract the text response.

In [26]:
conversational_qa_chain.invoke(
    {
        "question": "When are the opening hours?",
        "chat_history": [],
    }
)


Intermediate Output: {'standalone_question': 'What are the opening hours?'}


"The restaurant's opening hours are as follows:\nMonday to Thursday: 11:00 AM - 11:00 PM\nFriday: 11:00 AM - 12:00 AM (midnight)\nSaturday: 10:00 AM - 12:00 AM (midnight)\nSunday: 10:00 AM - 11:00 PM"