In [17]:
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

llm = ChatOpenAI(temperature=0.1)

embedder = OpenAIEmbeddings()
cache_dir = LocalFileStore("../../.cache")
cached_embedder = CacheBackedEmbeddings.from_bytes_store(embedder, cache_dir)

loader = UnstructuredFileLoader("../../files/1984.txt")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50, separator="\n"
)
docs = loader.load_and_split(text_splitter=splitter)

vectorstore = FAISS.from_documents(docs, cached_embedder)
retriever = vectorstore.as_retriever()

In [18]:
# Stuff Document Chain

from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
chain.invoke("Describe Victory Mansions")

AIMessage(content='Victory Mansions is a building located in London. It is described as having glass doors and a hallway that smells of boiled cabbage and old rag mats. The building has seven floors, and the protagonist, Winston Smith, lives on the seventh floor. The elevator in Victory Mansions is often out of service, so Winston usually takes the stairs. On each landing, there is a large poster with the face of Big Brother, the leader of the Party, with the caption "BIG BROTHER IS WATCHING YOU." The building is also mentioned to have a canteen where Winston usually has lunch.')

In [20]:
# Map Reduce Document Chain

from langchain.schema.runnable import RunnableLambda

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": document.page_content, "question": question}
        ).content
        for document in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

chain = {"context": map_chain, "question": RunnablePassthrough()} | prompt | llm
chain.invoke("Where does Winston go to work?")

AIMessage(content='Winston goes to work at the Ministry of Truth.')