In [21]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
# from langchain.document_loaders import UnstructuredFileLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter

from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA


llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=1500,
    chunk_overlap=300
)
loader = TextLoader("./files/animal_farm.txt")

# docs = loader.load()
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vector_store = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "map_rerank",
    retriever=vector_store.as_retriever()
)

chain.run("What is the name of the book?")



'The name of the book is "Animal Farm" by George Orwell.'

In [22]:
chain.run("Who is Napoleon?")



"Napoleon is a pig on Animal Farm who is elected as the President. He is the only candidate for this position and further documents reveal Snowball's supposed complicity with Jones. Napoleon is also implicated in Snowball's supposed betrayal."

In [23]:
chain.run("How many animals are killed by Napoleon?")



"I don't know"

In [20]:
results = vector_store.similarity_search("How many animals killed by Napoleon?", 10) 

results

[Document(page_content='fetched Muriel. Muriel read the Commandment for her. It ran: "No animal\nshall kill any other animal WITHOUT CAUSE." Somehow or other, the last two\nwords had slipped out of the animals\' memory. But they saw now that the\nCommandment had not been violated; for clearly there was good reason for\nkilling the traitors who had leagued themselves with Snowball.\nThroughout the year the animals worked even harder than they had worked in\nthe previous year. To rebuild the windmill, with walls twice as thick as\nbefore, and to finish it by the appointed date, together with the regular\nwork of the farm, was a tremendous labour. There were times when it seemed\nto the animals that they worked longer hours and fed no better than they\nhad done in Jones\'s day. On Sunday mornings Squealer, holding down a long\nstrip of paper with his trotter, would read out to them lists of figures\nproving that the production of every class of foodstuff had increased by\ntwo hundred per 