In [2]:
#! uv pip install langchain openai tiktoken rapidocr-onnxruntime python-dotenv langchain-community

In [3]:
#! uv add langchain langchain-community

In [4]:
#‚Äútiktoken is OpenAI-specific; for Mistral I use Hugging Face tokenizers. 
# OCR and LangChain are model-agnostic, so they work seamlessly with Mistral.‚Äù


In [5]:
from dotenv import load_dotenv
import os

load_dotenv()  # Loads .env into environment

api_key = os.getenv("MISTRAL_API_KEY")

#print(api_key)  # just to test (remove later)

In [6]:
#from langchain.document_loaders import TextLoader
from langchain_community.document_loaders import TextLoader
loader = TextLoader(r"C:\\Users\\hp\Desktop\\MLops\\LLMOPS\\LLMOPS_RAG_V1\\data\\agentic_ai.txt", encoding="utf8")
documents = loader.load()

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
documents[0].page_content[:500]  # Print the first 500 characters of the first documen

'Overview\nAI agents possess several key attributes, including complex goal structures, natural language interfaces, the capacity to act independently of user supervision, and the integration of software tools or planning systems. Their control flow is frequently driven by large language models (LLMs).[2] Agents also include memory systems for remembering previous user-agent interactions and orchestration software for organizing agent components.[3]\n\nResearchers and commentators have noted that AI'

## Chunking using RecursiveCharacte text Splitter
Below website will give might help you in determinging the chunking size and overlap.       
https://chunkviz.up.railway.app/

In [8]:
# uv add langchain-text-splitters


In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap=20)

In [10]:
text_chunks = splitter.split_documents(documents)

In [11]:
print("Total Chunks : ",len(text_chunks))
text_chunks

Total Chunks :  118


[Document(metadata={'source': 'C:\\\\Users\\\\hp\\Desktop\\\\MLops\\\\LLMOPS\\\\LLMOPS_RAG_V1\\\\data\\\\agentic_ai.txt'}, page_content='Overview'),
 Document(metadata={'source': 'C:\\\\Users\\\\hp\\Desktop\\\\MLops\\\\LLMOPS\\\\LLMOPS_RAG_V1\\\\data\\\\agentic_ai.txt'}, page_content='AI agents possess several key attributes, including complex goal structures, natural language interfaces, the capacity to act independently of user supervision, and the integration of software tools'),
 Document(metadata={'source': 'C:\\\\Users\\\\hp\\Desktop\\\\MLops\\\\LLMOPS\\\\LLMOPS_RAG_V1\\\\data\\\\agentic_ai.txt'}, page_content='of software tools or planning systems. Their control flow is frequently driven by large language models (LLMs).[2] Agents also include memory systems for remembering previous user-agent interactions'),
 Document(metadata={'source': 'C:\\\\Users\\\\hp\\Desktop\\\\MLops\\\\LLMOPS\\\\LLMOPS_RAG_V1\\\\data\\\\agentic_ai.txt'}, page_content='interactions and orchestration softw

## Installing Vector Store


In [12]:
! uv pip install faiss-cpu

[2mUsing Python 3.11.8 environment at: C:\Users\hp\Desktop\MLops\LLMOPS\LLMOPS_RAG_V1\.venv[0m
[2mAudited [1m1 package[0m [2min 150ms[0m[0m


# OpenAi Embeddings Equivalents 

    As we are not ustilizing OpenAi API, so ‚Äútiktoken"  cannot be used ...     
    ‚Äútiktoken is OpenAI-specific; for Mistral I use Hugging Face tokenizers.       
    OCR and LangChain are model-agnostic, so they work seamlessly with Mistral.              


# Mistral equivalent of OpenAIEmbeddings

        If you want embeddings without OpenAI, you do this:

        Option 1: Hugging Face embeddings (local)
        from langchain_community.embeddings import HuggingFaceEmbeddings

        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )

        Option 2: Mistral embeddings (API)
        from langchain_mistralai import MistralAIEmbeddings

        embeddings = MistralAIEmbeddings(
            model="mistral-embed"
        )


In [13]:
! uv add langchain_mistralai

[2mResolved [1m96 packages[0m [2min 14ms[0m[0m
[2mAudited [1m90 packages[0m [2min 959ms[0m[0m


In [14]:
from langchain_mistralai import MistralAIEmbeddings

embeddings = MistralAIEmbeddings(
    model="mistral-embed"
)

In [15]:
from langchain_community.vectorstores import FAISS
vectorstore=FAISS.from_documents(text_chunks, embeddings)

In [16]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x1ff5fcc2b90>

In [17]:
retriever=vectorstore.as_retriever()

## Similarity Search 
    Before Going Aheas Lets see a bit about Similarity Search and what it does 

    Similarity search finds the most relevant documents by comparing the vector embeddings of a query with stored document embeddings using distance metrics like cosine similarity.
    It retrieves content based on semantic meaning, not exact keywords.
    This is the core mechanism behind RAG systems, recommendations, and semantic search engines.

In [18]:
# Perform similarity search
query = "What is the Key Points of Agentic AI?"
docs = vectorstore.similarity_search(query, k=4)

# Display the results
for i, doc in enumerate(docs):
    print(f"Document {i+1}:")
    print(doc.page_content)
    print("-" * 50)

Document 1:
AI agents possess several key attributes, including complex goal structures, natural language interfaces, the capacity to act independently of user supervision, and the integration of software tools
--------------------------------------------------
Document 2:
Researchers and commentators have noted that AI agents do not have a standard definition.[2][4][5][6] The concept of agentic AI has been compared to the fictional character J.A.R.V.I.S..[7]
--------------------------------------------------
Document 3:
Multimodal AI agents
--------------------------------------------------
Document 4:
A common application of AI agents is the automation of tasks‚Äîfor example, booking travel plans based on a user's prompted request.[8][9] Prominent examples include Devin AI, AutoGPT, and SIMA.[10]
--------------------------------------------------


In [19]:
from langchain_core.prompts import ChatPromptTemplate

template="""You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use ten sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""

In [20]:
prompt=ChatPromptTemplate.from_template(template)

In [21]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks.\nUse the following pieces of retrieved context to answer the question.\nIf you don't know the answer, just say that you don't know.\nUse ten sentences maximum and keep the answer concise.\nQuestion: {question}\nContext: {context}\nAnswer:\n"), additional_kwargs={})])

## StrOutputParser

    This imports StrOutputParser, which is used to convert an LLM‚Äôs response into a clean Python string.
    What it does (in simple terms)
    Takes the raw LLM output (often a message/object)
    Extracts only the text content
    Removes metadata, roles, or formatting

In [22]:
from langchain_classic.schema.output_parser import StrOutputParser

In [23]:
output_parser=StrOutputParser()

# Selecting the Model
    In my case I am using the Mistral AI model , coz I have hit the free limit of ChatGPT

In [24]:
from langchain_mistralai import ChatMistralAI

llm = ChatMistralAI(
    model="mistral-small",
    api_key=api_key
)

# RAG PIPELINE WITH LANGCHAIN
    This following code builds a RAG (Retrieval-Augmented Generation) pipeline using LangChain‚Äôs LCEL (LangChain Expression Language).
    What RunnablePassthrough does :
            RunnablePassthrough():
            Takes the input as-is
            Passes it forward without modifying it
            Used when the same input needs to be routed to multiple components
            
    What this chain achieves

            ‚úî Retrieves relevant documents
            ‚úî Injects them into the prompt
            ‚úî Sends everything to the LLM
            ‚úî Returns a clean text answer

            üëâ This is a canonical RAG pipeline  Input ‚Üí Retrieve ‚Üí Prompt ‚Üí LLM ‚Üí Parse    

    This LCEL pipeline routes the user query to both a retriever and directly into the prompt, enabling context-aware generation. RunnablePassthrough ensures the original question is preserved while retrieval happens in parallel.

In [25]:
from langchain_classic.schema.runnable import RunnablePassthrough

# {"context": retriever,  "question": RunnablePassthrough()}
# This entire cabove code will create an object with question and context and it will be fed to "prompt" a it is...

rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

In [26]:
rag_chain.invoke("Tell me about the Agentic Ai ?")

'Agentic AI refers to AI systems designed to act autonomously with specific attributes. These include complex goal structures, natural language interfaces, and the ability to operate independently without constant user supervision. They often integrate various software tools to perform tasks. The concept has been compared to fictional AI like J.A.R.V.I.S. from popular media. AI agents lack a standardized definition but are categorized into types like business-task agents and conversational agents. Governments and organizations, such as the city of Kyle, Texas, have deployed AI agents for practical applications. Their roles range from enterprise software tasks to customer support.'