In [None]:
import json

import langchain
langchain.debug=True
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import VectorStore, FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

In [None]:
# There are lots of parameters you can play with in each of these steps!
# - how documents are chunked
# - which vectorstore we use and what parameters
# - etc.

In [None]:
# Load documents
documents = []
for line in open('/Users/shawn/Downloads/documents.jsonl'):
    documents.append(Document(**json.loads(line)))

# First 100 only
documents = documents[:100]

In [None]:
# Construct vector store

embeddings = OpenAIEmbeddings()
faiss = FAISS.from_documents(documents, embeddings)

In [None]:
# Construct an LLM for the query

llm = OpenAI(model_name='text-davinci-003', temperature=0.7)

In [None]:
# Construct a Retrieval QA chain, giving it an LLM and VectorStore.
# chain_type 'stuff' tells the chain to just stick the found documents in the context of the query.
# There are other options!

qa = RetrievalQA.from_chain_type(
        llm=llm, chain_type='stuff', retriever=faiss.as_retriever()
    )

In [None]:
# Perform inference

qa.run('Who are you?')