# Agent 5: RAG AI Agent

In [8]:
from dotenv import load_dotenv
import os
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage, ToolMessage
from operator import add as add_messages

from langchain_groq import ChatGroq
from langchain_openai import OpenAIEmbeddings # we are using an open-ai model (gpt-oss 120b)

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_chroma import Chroma # -> needs open-ai api key (not free)
from langchain_huggingface import HuggingFaceEmbeddings # -> free alt

from langchain_core.tools import tool

load_dotenv()

True

## What is **Chroma**?

Chroma
An open-source **vector database** designed to store embeddings and perform fast similarity search.

It handles:

* Embedding storage
* Vector indexing
* Similarity search (cosine / L2 / dot product)
* Persistent local storage

---

### How **LangChain** and **Chroma** Work Together

In a typical **RAG (Retrieval-Augmented Generation)** pipeline:

1. Documents → split into **chunks**
2. Chunks → converted to **embeddings**
3. Embeddings → stored in **Chroma**
4. User query → embedded
5. Chroma → returns most similar chunks
6. LangChain → injects retrieved chunks into prompt
7. LLM → generates final answer


In [2]:
llm = ChatGroq(model = "openai/gpt-oss-120b", 
            temperature = 0)

In [3]:
# Our Embedding Model - has to also be compatible with the LLM
# embeddings = OpenAIEmbeddings(
#     model="text-embedding-3-small"
# ) -> not free

embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 671.07it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [4]:
PDF_PATH = "RAG_TEST_CV.pdf"

# load the pdf
pdf_loader = PyPDFLoader(PDF_PATH)

# Checks if the PDF is there
try:
    pages = pdf_loader.load()
    print(f"PDF has been loaded and has {len(pages)} pages")
except Exception as e:
    print(f"Error loading PDF: {e}")
    raise

PDF has been loaded and has 2 pages


### Chunking Process
We use `RecursiveCharacterTextSplitter` to divide large documents into
smaller, semantically coherent chunks before embedding.


#### Why chunking?
- Embedding models have context length limits.
- Smaller chunks improve retrieval granularity.
- Prevents truncation of long documents.

`chunk_size`=1000:

  Each chunk will contain up to 1000 characters.
  This is a balance between:
 - Enough context for meaningful embeddings
 - Small enough size for precise similarity search


`chunk_overlap`=200:

  Consecutive chunks share 200 characters.
  This **overlap** preserves context across boundaries and
  reduces the risk of **splitting** important information.


The splitter works recursively by trying to split first on:

  paragraphs → sentences → words → characters
  
to maintain semantic structure as much as possible.


In [5]:
# Chunking Process
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# apply text splitter to our pages
pages_split = text_splitter.split_documents(pages)

In [6]:
# you must add this, it won't add it to your current dir by default
persist_directory = r"C:\Users\YOUSSEF\Desktop\Machine Learning\Reinforcement Learning\LangGraph (COURSE)\agents"

try:
    # Here, we actually create the chroma database using our embeddigns model
    vectorstore = Chroma.from_documents(
        documents=pages_split,
        embedding=embeddings,
        persist_directory=persist_directory, #where to save it 
        collection_name="my_cv_embedded" # what to name it
    )
    print(f"Created ChromaDB vector store!")
    
except Exception as e:
    print(f"Error setting up ChromaDB: {str(e)}")
    raise

Created ChromaDB vector store!
