In [4]:
from langchain.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

loader = PyPDFLoader("app/assets/Bezos.pdf")
docs = loader.load_and_split()

In [None]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.8)

embeddings = OpenAIEmbeddings()

In [6]:
from langchain_chroma import Chroma
chroma_db = Chroma.from_documents(
    documents=docs, 
    embedding=embeddings, 
    persist_directory="data", 
    collection_name="capstone"
)

In [10]:
query = "What is this document about?"

In [12]:
docs = chroma_db.similarity_search(query)
print(docs)

[Document(id='a1d37a75-bf53-431f-97a5-bbff9fa4747d', metadata={'author': 'SIDDESH SAKHALKAR', 'total_pages': 9, 'page': 8, 'creationdate': '2024-08-09T14:46:50+05:30', 'page_label': '9', 'creator': 'Microsoft® Word 2019', 'moddate': '2024-08-09T14:46:50+05:30', 'producer': 'Microsoft® Word 2019', 'source': 'Bezos.pdf'}, page_content='Leadership style \n"Day 1" Management Philosophy \nDay 1: start up \nDay 2: stasis \nDay 3: irrelevance \nDay 4: "excruciating, painful decline" \nDay 5: death  \nBezos has stated "it is always Day 1" to describe his growth mindset.[164][165] \nBezos used what he called a "regret-minimization framework" while he worked at D. E. Shaw and \nagain during the early years of Amazon. He described this life philosophy by stating: "When I\'m 80, \nam I going to regret leaving Wall Street? No. Will I regret missing the beginning of the Internet? \nYes."[166] During the 1990s and early 2000s at Amazon, he was characterized as trying to quantify all \naspects of runn

In [21]:
prompt = """

    Answer the user query based on the context and analyzie the chat history when required.
    if the answer dosent exist in the context and chat history then say i dont know
    if the answer is related to chat history then give an appropriate answer.

    context:{context}

    """

source_docs = chroma_db.similarity_search(query=query)

print(source_docs)

[Document(id='a1d37a75-bf53-431f-97a5-bbff9fa4747d', metadata={'creator': 'Microsoft® Word 2019', 'author': 'SIDDESH SAKHALKAR', 'moddate': '2024-08-09T14:46:50+05:30', 'total_pages': 9, 'page': 8, 'page_label': '9', 'producer': 'Microsoft® Word 2019', 'source': 'Bezos.pdf', 'creationdate': '2024-08-09T14:46:50+05:30'}, page_content='Leadership style \n"Day 1" Management Philosophy \nDay 1: start up \nDay 2: stasis \nDay 3: irrelevance \nDay 4: "excruciating, painful decline" \nDay 5: death  \nBezos has stated "it is always Day 1" to describe his growth mindset.[164][165] \nBezos used what he called a "regret-minimization framework" while he worked at D. E. Shaw and \nagain during the early years of Amazon. He described this life philosophy by stating: "When I\'m 80, \nam I going to regret leaving Wall Street? No. Will I regret missing the beginning of the Internet? \nYes."[166] During the 1990s and early 2000s at Amazon, he was characterized as trying to quantify all \naspects of runn

In [19]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

template = ChatPromptTemplate.from_messages([
        ("system", prompt),
        MessagesPlaceholder("history"),
        ("human", "{question}")
    ])

chat_history = []


In [22]:
res = llm.invoke(template.format(question=query, history=chat_history, context=source_docs))
print(res.content)

This document provides information about Jeff Bezos, specifically focusing on his leadership style, early life, education, business career, investments, and space exploration endeavors. It also covers his philosophy, work-life harmony, and interactions with Amazon shareholders and customers.


In [23]:
chat_history.append(HumanMessage(content=query))
chat_history.append(AIMessage(content=res.content))

In [24]:
query="What was my 1st question ?"

source_docs = chroma_db.similarity_search(query=query)


In [25]:
res = llm.invoke(template.format(question=query, history=chat_history, context=source_docs))
print(res.content)

AI: Your first question was "What is this document about?"


In [15]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type="stuff",
                                    retriever=chroma_db.as_retriever())

response = chain(query)

print(response)


{'query': 'What is this document about?', 'result': 'This document provides information about Jeff Bezos, the founder of Amazon, his business ventures, leadership style, early life, education, and involvement in space exploration through Blue Origin. It covers various aspects of his personal and professional life, including his management philosophies, investments, and contributions to different industries such as media, technology, and space travel.'}
