In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

# Initialize the Chat Model (Language Model)
llm = ChatOpenAI(
    temperature=0.1
)

# Define the directory for caching embeddings (key-value store for vectors)
cache_dir = LocalFileStore("../.cache/")

# Define the text splitter using a tiktoken encoder for precise chunking
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n\x18\n",
    chunk_size=600,
    chunk_overlap=100
)

# Load the file using the Unstructured loader
loader = UnstructuredFileLoader("../files/chapter-1.text")

# Load the document and split it into smaller, manageable chunks (Documents)
docs = loader.load_and_split(text_splitter=splitter)

# Initialize the base embeddings model (converts text to vector representations)
embeddings = OpenAIEmbeddings()

# Wrap the base embeddings with a cache for persistence and speed (Avoid recomputing vectors)
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# Create a FAISS vector store from the documents and cached embeddings.
# This store enables efficient similarity searches (retrieval).
vectorstore = FAISS.from_documents(docs, cached_embeddings)

# Convert the vector store into a retriever, which fetches relevant documents
# based on a user's query.
retriever = vectorstore.as_retriever()

# --- Map Documents Stage (Condensing/Filtering Context) ---

# Define the prompt for the "Map" stage (used on *each* retrieved document).
# It asks the LLM to extract only the text relevant to the question from the chunk.
map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question.
            Return any relevant text verbatim.
            -------
            {context}
            """,
        ),
        ("human", "{question}")
    ]
)

# Create the chain for the map-doc stage: Prompt -> LLM
map_doc_chain = map_doc_prompt | llm

# Custom function to iterate over all retrieved documents, call the map_doc_chain
# on each one to extract relevance, and join the results.
def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']

    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question
    }).content for doc in documents)

# Combine the retrieval and mapping function into a single "Map Chain".
# 'documents': The retriever runs first to get relevant docs.
# 'question': The user's question is passed straight through.
# The results are passed to map_docs for processing.
map_chain = {"documents": retriever, "question": RunnablePassthrough()} | RunnableLambda(map_docs)

# --- Final Answer Stage (Reduce/Generate) ---

# Define the prompt for the "Reduce" or "Final Answer" stage.
# This prompt takes the condensed context from the map_chain and the original question
# to generate the final, coherent answer.
final_prompt = ChatPromptTemplate.from_messages([
    ("system",
     """
     Given the following extracted parts of a long document and a question, create a final answer.
     If you don't know the answer, just say that you don't know. Don't try to make up an aswer
     ------
     {context}
     """),
     ("human", "{question}")
])

# The full RAG chain:
# 'context': The result of the map_chain (the condensed, relevant text).
# 'question': The original question (passed through).
# These two inputs go into the final_prompt, which is processed by the LLM.
chain = {"context": map_chain ,"question": RunnablePassthrough() } | final_prompt | llm

# Execute the chain with the user's query.
chain.invoke("Describe Victory Mansions")