In [32]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
  separator="\n",
  chunk_size=600,
  chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.txt")
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
  embeddings, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
  llm=llm,
  chain_type="refine",
  retriever=vectorstore.as_retriever(),
)


In [33]:
chain.run("Where does Winston live?")

"Thank you for providing the additional context from George Orwell's novel 1984. Based on this context, Winston Smith lives in Victory Mansions. The flat where he resides is on the seventh floor and is described as having a hallway that smells of boiled cabbage and old rag mats. The flat has a telescreen, an oblong metal plaque on the right-hand wall that cannot be completely shut off, constantly broadcasting information. Winston's flat also has a window through which he can look out."

In [34]:
chain.run("Please describe 'Victory Mensions'")

'In the dystopian world of George Orwell\'s novel, 1984, the mention of "Victory Gin" and "Victory Cigarettes" takes on a deeper meaning. The description highlights the oppressive nature of the Ministry of Love, a place that remains inaccessible to ordinary citizens like the protagonist, Winston. The Ministry is heavily guarded with barbed-wire entanglements, steel doors, and armed guards. This strict security emphasizes the control and fear instilled by the Party.\n\nTo cope with his challenging existence in this grim environment, Winston turns to "Victory Gin" and "Victory Cigarettes" for temporary relief. The gin is described as a colorless liquid with a plain white label, emitting an unpleasant odor reminiscent of Chinese rice spirit. Although the taste is harsh, Winston drinks it, enduring a burning sensation similar to nitric acid. However, he experiences a temporary sense of cheerfulness afterward.\n\nSimilarly, the "Victory Cigarettes" are portrayed as of poor quality, as the t