# Conclusion

## Question answering on chapters

In [1]:
import tomli, os
with open("../.streamlit/secrets.toml","rb") as f:
    secrets = tomli.load(f)
os.environ["OPENAI_API_KEY"] = secrets["OPENAI_API_KEY"]

In [2]:
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
chat = ChatOpenAI(model_name='gpt-3.5-turbo')

In [3]:
pdf = {
       'chap1':'Chap 1 - Intro.pdf',
       'chap2':'Chap 2 - The ChatGPT API.pdf',
       'chap3':'Chap 3 - Chaining & Summarization.pdf',
       'chap4':'Chap 4 - Vector search & Question Answering.pdf',
       'chap5':'Chap 5 - Agents & Tools.pdf',
       'chap6':'Chap 6 - Speech-to-Text & Text-to-Speech.pdf',
       'chap7':'Chap 7 - Vision.pdf',
       'chap8':'Chap 8 - DALL-E.pdf',
       'chap9':'Chap 9 - Conclusion.pdf',
       'chap10':'Chap 10 - Appendix.pdf'
       }

In [4]:
# add all chapters into one big PDF
from pypdf import PdfWriter
pdfs = []
for k,v in pdf.items():
    if v:
        pdfs.append(f'../{k}/{v}')
merger = PdfWriter()
for pdf in pdfs:
    merger.append(pdf)
merger.write('book.pdf')

(True, <_io.FileIO [closed]>)

In [5]:
loader = PyPDFLoader('book.pdf')
pages = loader.load_and_split()
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(pages, embeddings,persist_directory=f"./chroma")

In [7]:
query = "What are the vector databases mentioned in the book?"
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":3})
# create a chain to answer questions 
qa = RetrievalQA.from_chain_type(
    llm=chat, chain_type="stuff", retriever=retriever, return_source_documents=True)
result = qa.invoke({"query": query})
result

{'query': 'What are the vector databases mentioned in the book?',
 'result': 'The vector databases mentioned in the book are Chroma, Pinecone, Weaviate, Faiss, Qdrant, and MongoDB.',
 'source_documents': [Document(page_content='15 \n Finally, after trying several vector databases, you can build a production system with Pinecone like this:  \n \nAs you will see in a future chapter, this application21 can be nicely architectured with plugins, that clearly \ndefine the API with two main endpoints: upsert  (to update or insert the vector database), or query  that \nwill convert the prompt into an embedding and perform a vector search to find the N (= 5) closest ones.  \n4.3. Application: Question answering on Book  \nThis is what the app will look like, a simple text entry and a button to trigger the workflow. The answer \nwill be written in the body, with sources from the document corpus. Check out the code under \nchap4/qa_app.py  \n \n \n21 https://github.com/pinecone -io/examples/blob/