In [None]:
# Vector Store
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm =ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter =CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size= 600,
    chunk_overlap= 100,
)
loader = UnstructuredFileLoader("./RAG/1984.docx")

docs = loader.load_and_split(text_splitter= splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}"),
    ("human","{question}"),
])

chain = {"context" : retriever, "question": RunnablePassthrough()} | prompt | llm

chain.invoke("Describe Victory Mansions")

In [6]:
# Map Reduce LCEL Chain

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

#model
llm =ChatOpenAI(temperature=0.1)

#vector location
cache_dir = LocalFileStore("./.cache/")

#docs split
splitter =CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size= 600,
    chunk_overlap= 100,
)
loader = UnstructuredFileLoader("./RAG/1984.docx")
docs = loader.load_and_split(text_splitter= splitter)

#embedding docs word to vector
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages([
    ("system",
    """
    Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
    ------
    {context}
    """
    ),
    ("human","{question}"),
])

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke({
            "context": doc.page_content,
            "question": question,
        }).content
        for doc in documents
    )

map_chain = {"documents": retriever, "question": RunnablePassthrough()} | RunnableLambda(map_docs)

# for doc in list of docs | prompt | llm
# for response in list of llms response | put them all together


final_prompt = ChatPromptTemplate.from_messages([
    ("system",
    """
    Given the following extracted parts of a long document and a question, create a final answer.
    If you don't know the answer, just say that you don't know. Don't try to make up an answer.
    ------
    {context}
    """
    ),
    ("human","{question}"),
])

chain = {"context": map_chain,"question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("Where does Winston go to work?")

AIMessage(content='Winston goes to work at the Ministry of Truth.')