# Document Question Answering

An example of using Chroma DB and LangChain to do question answering over documents.

In [None]:
import os
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader

## Load documents

Load documents to do question answering over. If you want to do this over your documents, this is the section you should replace.

In [None]:
loader = TextLoader('state_of_the_union.txt')
documents = loader.load()

## Split documents

Split documents into small chunks. This is so we can find the most relevant chunks for a query and pass only those into the LLM.

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

## Initialize ChromaDB

Create embeddings for each chunk and insert into the Chroma vector database.

In [None]:
## OpenAI
##embeddings = OpenAIEmbeddings()
## Azure OpenAI
embeddings = OpenAIEmbeddings(chunk_size=1)

vectordb = Chroma.from_documents(texts, embeddings)

## Create the chain

Initialize the chain we will use for question answering.

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI
qa = VectorDBQA.from_chain_type(llm=AzureOpenAI(deployment_name="text-davinci-003",model_name="text-davinci-003"), chain_type="stuff", vectorstore=vectordb)

## Ask questions!

Now we can use the chain to ask questions!

In [None]:
query = "Is America a great country? Which parts of the document mentioned this?"
qa.run(query)