In [134]:
import os
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGSMITH_TRACING=os.getenv("LANGSMITH_TRACING")
LANGSMITH_ENDPOINT=os.getenv("LANGSMITH_ENDPOINT")
LANGSMITH_API_KEY=os.getenv("LANGSMITH_API_KEY")
LANGSMITH_PROJECT=os.getenv("LANGSMITH_PROJECT")

# Simple LLM Call

In [129]:
from langchain_openai import ChatOpenAI
MODEL = "gpt-4o-mini"
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model=MODEL)
llm.invoke("Hello, world!")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 11, 'total_tokens': 21, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_bd83329f63', 'finish_reason': 'stop', 'logprobs': None}, id='run-ad2e96ab-3956-4374-95c1-1502361fe656-0', usage_metadata={'input_tokens': 11, 'output_tokens': 10, 'total_tokens': 21, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [3]:
llm_response = llm.invoke("Tell me a joke")
llm_response

AIMessage(content='Why don’t scientists trust atoms?\n\nBecause they make up everything!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 11, 'total_tokens': 25, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-cebb5269-dc2c-4664-b4cd-a37d6e60cabf-0', usage_metadata={'input_tokens': 11, 'output_tokens': 14, 'total_tokens': 25, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

# Parse Output

In [4]:
from langchain_core.output_parsers import StrOutputParser

str_output_parser = StrOutputParser()
str_output_parser.invoke(llm_response)

'Why don’t scientists trust atoms?\n\nBecause they make up everything!'

# Simple Chain

In [5]:
chain = llm | str_output_parser
chain.invoke("Tell me a joke")

'Why did the scarecrow win an award? \n\nBecause he was outstanding in his field!'

# Structured Output

In [6]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
    phone_model: str = Field(description= "Name and model of the phone")
    rating: float = Field(description= "Overall rating out of 5")
    pros: List[str] = Field(description= "List of positive aspects")
    cons: List[str] = Field(description= "List of negative aspects")
    summary: str = Field(description= "Brief summary of the review")

review_text = """
Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous, colors pop like crazy. Camera's insane too, especially at night - my Insta game's never been stronger. Battery life's solid, lasts me all day no problem.

Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung. Also, still getting used to the new button layout, keep hitting Bixby by mistake.

Overall, I'd say it's a solid 4 out 5. Great phone, but a few annoying quirks keep it from being perfect. If you're due for an upgrade, definitely worth checking out!
"""

structured_llm = llm.with_structured_output(MobileReview)
output = structured_llm.invoke(review_text)
# structured output
output

MobileReview(phone_model='Samsung Galaxy S21', rating=4.0, pros=['Gorgeous screen', 'Vibrant colors', 'Insane camera performance, especially at night', 'Solid battery life'], cons=['Pretty pricey', 'No charger included', 'New button layout takes getting used to'], summary='Overall, a solid 4 out of 5. Great phone, but a few annoying quirks keep it from being perfect.')

In [7]:
output.phone_model, output.rating

('Samsung Galaxy S21', 4.0)

In [8]:
# non-structured output
str_output_parser.invoke(llm.invoke(review_text))

"It sounds like you're really enjoying your new Galaxy S21! The vibrant screen and impressive camera capabilities are definitely standout features that make it a great choice for photography enthusiasts and social media users alike. It's understandable to feel a bit frustrated about the lack of a charger in the box—many users have expressed similar concerns.\n\nAs for the button layout, it can take some time to adjust, especially if you're used to a different design. Hopefully, you'll get the hang of it soon and avoid those accidental Bixby activations!\n\nYour 4 out of 5 rating reflects a balanced view, highlighting both the phone's strengths and its minor drawbacks. It sounds like a fantastic option for anyone looking to upgrade. Enjoy capturing those amazing shots and sharing them on Instagram!"

# Prompt Template

In [9]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}.")
prompt.invoke({"topic": "programming"})

ChatPromptValue(messages=[HumanMessage(content='Tell me a short joke about programming.', additional_kwargs={}, response_metadata={})])

In [10]:
chain = prompt | llm | str_output_parser
chain.invoke({"topic": "programming"})

'Why do programmers prefer dark mode?\n\nBecause light attracts bugs!'

In [11]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# define the prompt
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}.")

# initialize the llm
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4o-mini")

# define the output parser
output_parser = StrOutputParser()

# compose the chain
chain = prompt | llm | output_parser

# run the chain and generate response
result = chain.invoke({"topic": "programming"})
print(result)

Why do programmers prefer dark mode?

Because light attracts bugs!


# LLM Messages

In [12]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage

system_message = SystemMessage(content= "You are a helpful assistant that tells jokes.")
human_message = HumanMessage(content= "Tell me about programming.")
llm.invoke(([system_message, human_message]))

AIMessage(content="Sure! Programming is the process of creating a set of instructions that a computer can understand and execute. It involves writing code in various programming languages, such as Python, Java, C++, and many others, to build software applications, websites, games, and more.\n\nAnd speaking of programming, here's a joke for you:\n\nWhy do programmers prefer dark mode?\n\nBecause light attracts bugs!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 77, 'prompt_tokens': 25, 'total_tokens': 102, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-66115f2e-b62b-4df2-bffb-2b916f168629-0', usage_metadata={'input_tokens': 25, 'output_tokens': 77, 'tota

In [13]:
template = ChatPromptTemplate([
    ("system", "You are a helpful assistant that tells jokes."),
    ("human", "Tell me about {topic}")
])

prompt_value = template.invoke(
    {"topic": "programming"}
)
prompt_value

ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant that tells jokes.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Tell me about programming', additional_kwargs={}, response_metadata={})])

In [14]:
llm.invoke(prompt_value)

AIMessage(content='Sure! Here’s a light-hearted joke about programming:\n\nWhy do programmers prefer dark mode?\n\nBecause light attracts bugs! \n\nIf you have any specific questions about programming or want to hear more jokes, feel free to ask!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 24, 'total_tokens': 69, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-c5709137-ff10-4b21-8716-ca30d50d1ed3-0', usage_metadata={'input_tokens': 24, 'output_tokens': 45, 'total_tokens': 69, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

# RAG

In [49]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from typing import List
from langchain_core.documents import Document
import os

CHUNK_SIZE = 1000
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = CHUNK_SIZE,
    chunk_overlap = CHUNK_SIZE*0.1,
    length_function = len
)

docx_loader = Docx2txtLoader("Data\HDFC Quarterly Results.docx")
documents = docx_loader.load()

splits = text_splitter.split_documents(documents)

In [None]:
pdf_loader = PyPDFLoader("Data\Reliance Quarterly Results.pdf")
documents = pdf_loader.load()
documents

In [54]:
# function to load documents from a folder
def load_documents(folder_path: str) -> List[Document]:
    documents = []
    file_names = os.listdir(folder_path)
    for file_name in file_names:
        file_path = os.path.join(folder_path, file_name)

        if file_name.lower().endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif file_name.lower().endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f'Unsupported file type: {file_name}')
            continue
        documents.extend(loader.load())
    return documents

In [58]:
# load documents from a folder
folder_path = "D:\ML\LangChain RAG Crash Course\Data"
documents = load_documents(folder_path= folder_path)
print(f"SUCCESS: Loaded {len(documents)} documents from the folder.")

splits = text_splitter.split_documents(documents)
print(f"SUCCESS: Split the documents into {len(splits)} chunks.")

Unsupported file type: HDFC Quarterly Results.doc
SUCCESS: Loaded 42 documents from the folder.
SUCCESS: Split the documents into 201 chunks.


In [70]:
# create embeddings
embeddings = OpenAIEmbeddings()
document_embeddings = embeddings.embed_documents([split.page_content for split in splits])
len(document_embeddings), len(document_embeddings[2]), document_embeddings[0][:10]

(201,
 1536,
 [-0.011530634015798569,
  -0.015186353586614132,
  0.0009800695115700364,
  -0.014760310761630535,
  -0.017371539026498795,
  0.021879343315958977,
  -0.003741615219041705,
  0.0169729832559824,
  -0.03438575193285942,
  -0.020807364955544472])

In [None]:
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

embedding_function= SentenceTransformerEmbeddings(model_name= 'all-MiniLM-L6-v2')
document_embeddings= embedding_function.embed_documents([split.page_content for split in splits])
len(document_embeddings), len(document_embeddings[2]), document_embeddings[0][:10]

In [81]:
os.path.join(os.getcwd(), "chroma_db")

'd:\\ML\\LangChain RAG Crash Course\\chroma_db'

In [89]:
# create vector store db and store
from langchain_chroma import Chroma

embedding_function= OpenAIEmbeddings()
collection_name= "my_collection"
vector_store= Chroma.from_documents(collection_name= collection_name,
                                     documents= splits,
                                     embedding= embedding_function)
Chroma.from_documents(
    collection_name= collection_name,
    documents= splits,
    embedding= embedding_function,
    persist_directory= "./chroma_db"
)

print(f"Vector store created and persisted to: {os.path.join(os.getcwd(), 'chroma_db')}")

Vector store created and persisted to: d:\ML\LangChain RAG Crash Course\chroma_db


In [92]:
# perform similarity search
query = "HDFC Bank's results are for which quarter?"
search_results = vector_store.similarity_search(query= query, k= 2)

print(f"\nTop 2 most relevant chunks for the query: {query}\n")
for i, result in enumerate(search_results, 1):
    print(f"Result {i}")
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content:\n{result.page_content}")
    print()


Top 2 most relevant chunks for the query: HDFC Bank's results are for which quarter?

Result 1
Source: D:\ML\LangChain RAG Crash Course\Data\HDFC Quarterly Results.pdf
Content:
Regd. Office: HDFC Bank Limited, HDFC Bank House, Senapati Bapat Marg, Lower Parel (West), Mumbai – 400 013 
 
 
 
 
Ref. No. SE/2024-25/85  
 
October 25, 2024 
 
BSE Limited     National Stock Exchange of India Limited  
Dept of Corporate Services   The Listing Department 
Phiroze Jeejeebhoy Towers,   Exchange Plaza 
Dalal Street, Fort,    Bandra Kurla Complex, 
Mumbai 400 001    Mumbai 400 051 
 
Dear Sirs, 
 
Sub: Transcript of Earnings Call for the quarter and half year ended September 30, 
2024  
 
We wish to inform you that pursuant to Regulation 30 of the SEBI (Listing Obligations and 
Disclosure Requirements) Regulations, 2015, the transcript of the Earnings Call with analysts 
and investors held on October 19, 2024 with respect to the financial results of the Bank for 
the quarter and half year ended 

In [93]:
retriever = vector_store.as_retriever(search_kwargs={"k":2})
retriever.invoke(query)

[Document(id='3f61be07-0d14-4162-affc-b18f44bb8a76', metadata={'page': 0, 'source': 'D:\\ML\\LangChain RAG Crash Course\\Data\\HDFC Quarterly Results.pdf'}, page_content='Regd. Office: HDFC Bank Limited, HDFC Bank House, Senapati Bapat Marg, Lower Parel (West), Mumbai – 400 013 \n \n \n \n \nRef. No. SE/2024-25/85  \n \nOctober 25, 2024 \n \nBSE Limited     National Stock Exchange of India Limited  \nDept of Corporate Services   The Listing Department \nPhiroze Jeejeebhoy Towers,   Exchange Plaza \nDalal Street, Fort,    Bandra Kurla Complex, \nMumbai 400 001    Mumbai 400 051 \n \nDear Sirs, \n \nSub: Transcript of Earnings Call for the quarter and half year ended September 30, \n2024  \n \nWe wish to inform you that pursuant to Regulation 30 of the SEBI (Listing Obligations and \nDisclosure Requirements) Regulations, 2015, the transcript of the Earnings Call with analysts \nand investors held on October 19, 2024 with respect to the financial results of the Bank for \nthe quarter and 

In [94]:
from langchain_core.prompts import ChatPromptTemplate

template = """
Answer the question based only on the following context:
{context}

Question: {question}

Answer: 
"""
prompt = ChatPromptTemplate.from_template(template)

In [95]:
from langchain.schema.runnable import RunnablePassthrough

rag_chain = (
    {
        "context": retriever,               # chunks retrieved based on similarity with query
        "question": RunnablePassthrough()   # query pass in invoke method
     } | prompt
)
rag_chain.invoke(query)

ChatPromptValue(messages=[HumanMessage(content="\nAnswer the question based only on the following context:\n[Document(id='3f61be07-0d14-4162-affc-b18f44bb8a76', metadata={'page': 0, 'source': 'D:\\\\ML\\\\LangChain RAG Crash Course\\\\Data\\\\HDFC Quarterly Results.pdf'}, page_content='Regd. Office: HDFC Bank Limited, HDFC Bank House, Senapati Bapat Marg, Lower Parel (West), Mumbai – 400 013 \\n \\n \\n \\n \\nRef. No. SE/2024-25/85  \\n \\nOctober 25, 2024 \\n \\nBSE Limited     National Stock Exchange of India Limited  \\nDept of Corporate Services   The Listing Department \\nPhiroze Jeejeebhoy Towers,   Exchange Plaza \\nDalal Street, Fort,    Bandra Kurla Complex, \\nMumbai 400 001    Mumbai 400 051 \\n \\nDear Sirs, \\n \\nSub: Transcript of Earnings Call for the quarter and half year ended September 30, \\n2024  \\n \\nWe wish to inform you that pursuant to Regulation 30 of the SEBI (Listing Obligations and \\nDisclosure Requirements) Regulations, 2015, the transcript of the Earn

In [96]:
def doc2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | doc2str,
     "question": RunnablePassthrough()} | prompt
)
rag_chain.invoke(query)

ChatPromptValue(messages=[HumanMessage(content="\nAnswer the question based only on the following context:\nRegd. Office: HDFC Bank Limited, HDFC Bank House, Senapati Bapat Marg, Lower Parel (West), Mumbai – 400 013 \n \n \n \n \nRef. No. SE/2024-25/85  \n \nOctober 25, 2024 \n \nBSE Limited     National Stock Exchange of India Limited  \nDept of Corporate Services   The Listing Department \nPhiroze Jeejeebhoy Towers,   Exchange Plaza \nDalal Street, Fort,    Bandra Kurla Complex, \nMumbai 400 001    Mumbai 400 051 \n \nDear Sirs, \n \nSub: Transcript of Earnings Call for the quarter and half year ended September 30, \n2024  \n \nWe wish to inform you that pursuant to Regulation 30 of the SEBI (Listing Obligations and \nDisclosure Requirements) Regulations, 2015, the transcript of the Earnings Call with analysts \nand investors held on October 19, 2024 with respect to the financial results of the Bank for \nthe quarter and half year ended September 30, 2024, has been made available on 

In [99]:
query

"HDFC Bank's results are for which quarter?"

In [100]:
# complete rag chain
rag_chain = (
    {"context": retriever | doc2str, "question": RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

query = "HDFC Bank's results are for which quarter?"

response = rag_chain.invoke(query)

print(response)

HDFC Bank's results are for the quarter ended September 30, 2024.


# Conversation RAG
RAG with Chat History

In [103]:
# example conversation
from langchain_core.messages import HumanMessage, AIMessage

chat_history = []
chat_history.extend([
    HumanMessage(content=query),
    AIMessage(content=response)
])
chat_history

[HumanMessage(content="HDFC Bank's results are for which quarter?", additional_kwargs={}, response_metadata={}),
 AIMessage(content="HDFC Bank's results are for the quarter ended September 30, 2024.", additional_kwargs={}, response_metadata={})]

In [104]:
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question"
    "which might reference context in the chat history,"
    "formulate a standalone question which can be understood"
    "without the chat history. Do NOT answer the question,"
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_system_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)
contextualize_chain = contextualize_q_system_prompt | llm | StrOutputParser()
contextualize_chain.invoke({"input": "Where is it headquartered?", "chat_history": chat_history})

'Where is HDFC Bank headquartered?'

In [107]:
retriever.invoke("Where is it headquartered?")

[Document(id='f920b4d6-1c23-470a-bf83-9566aebc2525', metadata={'page': 0, 'source': 'D:\\ML\\LangChain RAG Crash Course\\Data\\Reliance Quarterly Results.pdf'}, page_content='Regd. Office: 3rd Floor, Maker Chambers IV, 222, Nariman Point, Mumbai- 400 021, India \nPhone #: +91-22-3555 5000, Telefax: +91-22-2204 2268. E-mail: investor.relations@ril.com, Website: www.ril.com \nCIN- L17110MH1973PLC019786 \n \n \nJanuary 17, 2025 \n \nBSE Limited \nPhiroze Jeejeebhoy Towers, \nDalal Street, \nMumbai 400 001 \n \nNational Stock Exchange of India Limited \nExchange Plaza, Plot No. C/1,  \nG Block, Bandra - Kurla Complex,  \nBandra (East), Mumbai 400 051 \n \nScrip Code: 500325  \n \nTrading Symbol: RELIANCE \nDear Sirs, \n \nSub:   Audio / video recording and transcript of the presentation made to the analysts \non the Unaudited Financial Results (Consolidated and Standalone) for the \nquarter and nine months ended December 31, 2024 \n \nPursuant to Regulation 30 of the Securities and Exchang

In [106]:
retriever.invoke('Where is HDFC Bank headquartered?')

[Document(id='c654bc8f-9148-4b1b-930d-cb610c01a0bb', metadata={'page': 2, 'source': 'D:\\ML\\LangChain RAG Crash Course\\Data\\HDFC Quarterly Results.pdf'}, page_content="HDFC Bank Limited \n  October 19, 2024 \n \nPage 2 of 16 \n \nClassification - Public \nClassification - Pubic \nModerator: Ladies and gentlemen, good day and welcome to HDFC Bank Limited ’s Q2 FY '25 Earnings \nConference Call. As a reminder, all participant lines will be in the listen -only mode and there \nwill be an opportunity for you to ask questions after the presentation concludes. Should you need \nassistance during this conference call, pl ease signal an operator by pressing  star, then zero on \nyour touchtone phone. Please note that this conference is being recorded. \nI now hand the conference over to Mr. Srinivasan Vaidyanathan, Chief Financial Officer, HDFC \nBank. Thank you, and over to you, sir. \nSrinivasan Vaidyanathan: Thank you, Neerav. Welcome to all the participants. I appreciate you all dialing

In [105]:
# history aware retriver from langchain
from langchain.chains import create_history_aware_retriever

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_system_prompt
)
history_aware_retriever.invoke({"input": "Where is it headquartered?", "chat_history": chat_history})

[Document(id='c654bc8f-9148-4b1b-930d-cb610c01a0bb', metadata={'page': 2, 'source': 'D:\\ML\\LangChain RAG Crash Course\\Data\\HDFC Quarterly Results.pdf'}, page_content="HDFC Bank Limited \n  October 19, 2024 \n \nPage 2 of 16 \n \nClassification - Public \nClassification - Pubic \nModerator: Ladies and gentlemen, good day and welcome to HDFC Bank Limited ’s Q2 FY '25 Earnings \nConference Call. As a reminder, all participant lines will be in the listen -only mode and there \nwill be an opportunity for you to ask questions after the presentation concludes. Should you need \nassistance during this conference call, pl ease signal an operator by pressing  star, then zero on \nyour touchtone phone. Please note that this conference is being recorded. \nI now hand the conference over to Mr. Srinivasan Vaidyanathan, Chief Financial Officer, HDFC \nBank. Thank you, and over to you, sir. \nSrinivasan Vaidyanathan: Thank you, Neerav. Welcome to all the participants. I appreciate you all dialing

In [111]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Use the following context to answer the user's question."),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
rag_chain.invoke({"input": "Where is it headquartered?", "chat_history": chat_history})

{'input': 'Where is it headquartered?',
 'chat_history': [HumanMessage(content="HDFC Bank's results are for which quarter?", additional_kwargs={}, response_metadata={}),
  AIMessage(content="HDFC Bank's results are for the quarter ended September 30, 2024.", additional_kwargs={}, response_metadata={})],
 'context': [Document(id='c654bc8f-9148-4b1b-930d-cb610c01a0bb', metadata={'page': 2, 'source': 'D:\\ML\\LangChain RAG Crash Course\\Data\\HDFC Quarterly Results.pdf'}, page_content="HDFC Bank Limited \n  October 19, 2024 \n \nPage 2 of 16 \n \nClassification - Public \nClassification - Pubic \nModerator: Ladies and gentlemen, good day and welcome to HDFC Bank Limited ’s Q2 FY '25 Earnings \nConference Call. As a reminder, all participant lines will be in the listen -only mode and there \nwill be an opportunity for you to ask questions after the presentation concludes. Should you need \nassistance during this conference call, pl ease signal an operator by pressing  star, then zero on \n

# Multi-User Chatbot

In [126]:
import sqlite3
from datetime import datetime

DB_NAME = "rag_app.db"

def get_db_connection():
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

def create_application_logs():
    conn = get_db_connection()
    conn.execute("""CREATE TABLE IF NOT EXISTS application_logs
                 (id INTEGER PRIMARY KEY AUTOINCREMENT,
                 session_id TEXT,
                 user_query TEXT,
                 gpt_response TEXT,
                 model TEXT,
                 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)""")
    conn.close()

def insert_application_logs(session_id, user_query, gpt_response, model):
    conn = get_db_connection()
    conn.execute("INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)",
                 (session_id, user_query, gpt_response, model))
    conn.commit()
    conn.close()

def get_chat_history(session_id):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute("""SELECT user_query, gpt_response 
                   FROM application_logs 
                   WHERE session_id = ?
                   ORDER BY created_at""", (session_id,))
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row['user_query']},
            {"role": "ai", "content": row['gpt_response']}
        ])
    conn.close()
    return messages

# Initialize the database
create_application_logs()

In [128]:
model

NameError: name 'model' is not defined

In [130]:
import uuid

session_id = str(uuid.uuid4())
print(session_id)
chat_history = get_chat_history(session_id)
print(chat_history)

question1 = "HDFC Bank's results are for which quarter?"
answer1 = rag_chain.invoke({"input": question1, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question1, answer1, MODEL)
print(f"Human: {question1}")
print(f"AI: {answer1}")

7fbc6428-e699-4f39-aba8-6daf3cf09c8b
[]
Human: HDFC Bank's results are for which quarter?
AI: HDFC Bank's results are for the quarter and half year ended September 30, 2024.


In [132]:
question2 = "Where is it headquartered?"
chat_history = get_chat_history(session_id)
print(chat_history)
answer2 = rag_chain.invoke({"input": question2, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question2, answer2, MODEL)
print(f"Human: {question2}")
print(f"AI: {answer2}")

[{'role': 'human', 'content': "HDFC Bank's results are for which quarter?"}, {'role': 'ai', 'content': "HDFC Bank's results are for the quarter and half year ended September 30, 2024."}]
Human: Where is it headquartered?
AI: HDFC Bank is headquartered in Mumbai, India.


# END