In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate 
from langchain.schema.runnable import RunnablePassthrough 

# 6.7
llm = ChatOpenAI(
    temperature=0.1
)

cache_dir = LocalFileStore("../.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n\x18\n",
    chunk_size=600,
    chunk_overlap=100
)
# load files
loader = UnstructuredFileLoader("../files/chapter-1.text")

# split files, into small docs makes it easier for llm to read 
docs = loader.load_and_split(text_splitter=splitter)

# embeddings are vector representation, the meaning behind the text docsk
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# takes docs and embeddings. we use the store to later make seraches and get related data/info
vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer"
    "just say you don't know, don't make it up:\n\n{context}"),
    ("human", "{question}")
])

chain ={"context": retriever, "question": RunnablePassthrough()} | prompt | llm

# String given to retirever, retriever returns a list of documents, the string/question also goes as the question in prompt
chain.invoke("Describe Victory Mansions")


AIMessage(content='Victory Mansions is a building with glass doors that let in gritty dust. The hallway smells of boiled cabbage and old rag mats. Inside, there is a poster of a man\'s face with a caption that reads "BIG BROTHER IS WATCHING YOU." The flat is seven flights up, and the building is part of the economy drive in preparation for Hate Week.')