# Build a Retrieval Augmented Generation (RAG) App

# ChatMistralAI, langchain, FAISS 

In [1]:
# ! pip install langchain 
# ! pip install langchain_community langchain_chroma
# ! pip install langchain-mistralai
# ! pip install sentence_transformers 
# ! pip install -U langchain-huggingface
# !pip install sentence_transformers==2.4.0 --quiet
# !pip install unstructured --quiet
# !pip install pdf2image --quiet
# !pip install pdfminer.six==20221105 --quiet
# !pip install unstructured-inference --quiet
# !pip install pikepdf==8.13.0 --quiet
# !pip install pypdf==4.0.2 --quiet
# !pip install pillow_heif==0.15.0 --quiet

In [2]:
import os 
from dotenv import load_dotenv
from langchain_mistralai import ChatMistralAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import DirectoryLoader
from langchain.vectorstores.utils import filter_complex_metadata
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from tqdm.autonotebook import tqdm, trange

# Load environment variables
load_dotenv()


  from tqdm.autonotebook import tqdm, trange


True

**using ChatMistralAI mistral-large-latest model**

In [3]:
llm = ChatMistralAI(model="mistral-large-latest")

**using HuggingFaceEmbeddings model**

In [4]:
embeddings = HuggingFaceEmbeddings()

**load Document/data**

In [5]:
# Use Directory Loader to load documents
loader = DirectoryLoader("documents", glob="**/*.pdf")
docs = loader.load()
docs = filter_complex_metadata(docs)
print(f"Number of documents loaded: {len(docs)}")

Number of documents loaded: 1


**Split into chunks**

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(len(splits))


51


**vectorstores (chromadb)**

In [7]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

**Retrieval and Generation**

In [8]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

### Customizing the prompt

In [9]:
# Retrieve 
retriever = vectorstore.as_retriever()

In [12]:
# Create a prompt template
template = """only Use the following pieces of context to answer the question at the end.
            only Use the following pieces of retrieved context to answer
            If you don't know the answer from the context, just say that you don't know, don't try to make up an answer.
            Use three sentences maximum and keep the answer as concise as possible.
            Always say "thanks for asking!" at the end of the answer.

            {context}

            Question: {question}

            Helpful Answer:"""

custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)



In [16]:
from textwrap import fill
answer =  rag_chain.invoke("What is Self-attention is a mechanism and what is used for?")
print(fill(answer, width=100))

Self-attention, also known as intra-attention, is a mechanism that relates different positions of a
single sequence to compute a representation of the sequence. It is used in various tasks such as
reading comprehension, abstractive summarization, textual entailment, and learning task-independent
sentence representations. The Transformer model, which relies entirely on self-attention, uses it to
compute representations of its input and output without using sequence-aligned RNNs or convolution.
Thanks for asking!


In [17]:
answer =  rag_chain.invoke("What is aduvantages of llama 2?")
print(fill(answer, width=100))

I'm sorry for the confusion, but the context provided does not contain any information about an
entity named "llama 2". Therefore, I'm unable to provide the advantages of llama 2. Thanks for
asking!
