In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

# 6.7
llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n\x18\n",
    chunk_size=600,
    chunk_overlap=100
)
# load files
loader = UnstructuredFileLoader("files/chapter-1.text")

# split files, into small docs makes it easier for llm to read 
docs = loader.load_and_split(text_splitter=splitter)

# embeddings are vector representation, the meaning behind the text docsk
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# takes docs and embeddings. we use the store to later make seraches and get related data/info
vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
)

# chain.run("Where does winston live?")
chain.run("Describe Victory Mansions")


'Victory Mansions is a building located in London, as described in George Orwell\'s novel "1984". In the book, Victory Mansions is depicted as a grim and oppressive place where the protagonist, Winston Smith, resides. The building is run-down and poorly maintained, with the elevator rarely working due to the electricity being cut off during daylight hours as part of the government\'s economy drive. The hallway of the building smells of boiled cabbage and old rag mats, and a large poster of a man\'s face with the caption "BIG BROTHER IS WATCHING YOU" looms over the residents. The building is equipped with a telescreen, a device that allows the government to monitor and control the residents\' actions and thoughts. Overall, Victory Mansions is a bleak and dreary setting that reflects the oppressive and totalitarian nature of the society in which Winston lives.'