In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

# Cache 경로 설정
cache_dir = LocalFileStore(".cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

# 호출 시 캐싱된 것이 있는지 확인함
# 없다면 임베딩을 진행하여 캐싱함
# 있다면 캐싱된 것을 반환함
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cache_embeddings)

# retriever를 이용하면 vector store뿐만 아니라 데이터베이스 검색, Cloud 등 다양한 곳에서 문서를 가져올 수 있음
chain = RetrievalQA.from_chain_type(
    llm=llm,
    # chain_type을 필요에 따라 stuff, refine, map_reduec, map_rerank로 바꿔주기만 하면 됨.
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions")

"The additional context provided gives a deeper insight into the living conditions and restrictions faced by the residents of Victory Mansions. The description of Winston's strategic positioning to evade the telescreen surveillance and his secret possession of the forbidden book highlights the oppressive nature of the society in which they live. The fact that Winston feels compelled to hide the book, even though it is empty, demonstrates the extreme measures individuals must take to maintain any sense of personal freedom and individuality within the confines of Victory Mansions. This further emphasizes the bleak and restrictive environment in which the residents are forced to exist."