# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [None]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [None]:
from langchain.document_loaders import CSVLoader
from IPython.display import display, Markdown

### A detailed step by step procedure to create index based on embeddings and query from that

In [47]:
file = '../OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
docs = loader.load()

In [None]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
embed = embeddings.embed_query("Hi my name is Harrison") ## Just to see what is an embedding looks like

In [None]:
print(len(embed))

In [None]:
print(embed[:5])

In [None]:
from langchain.vectorstores import DocArrayInMemorySearch

In [None]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [None]:
query = "Please suggest a shirt with sunblocking"

In [None]:
relevent_docs = db.similarity_search(query)

In [None]:
len(relevent_docs)

In [None]:
relevent_docs[0]

In [None]:
retriever = db.as_retriever()

In [None]:
from langchain.chat_models import ChatOpenAI

In [None]:
llm = ChatOpenAI(temperature = 0.0)

In [None]:
qdocs = "".join([relevent_docs[i].page_content for i in range(len(relevent_docs))])

In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [None]:
display(Markdown(response))

### Retrieval using langchain shortcuts

In [None]:
file = '../OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [None]:
from langchain.indexes import VectorstoreIndexCreator

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [None]:
response = index.query(query, llm=llm)

In [None]:
display(Markdown(response))

In [None]:
from langchain.chains import RetrievalQA

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

**stuff method**
 - simplest method
 - stuff all data into the context and pass to llm
 - make single call
 - llm has all data at once
 - but llms have a context length, so may result in a prompt larger than context

Like **stuff method** there are other methods like **map_reduce, refine etc.**

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)

In [None]:
display(Markdown(response))

### Retrieval from customized indexes

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch, # customized vectorstore
    embedding=embeddings, # customized embeddings
).from_loaders([loader])