# Module 08: Agentic RAG
This notebook demonstrates how traditional Chroma DB works for RAG pipelines.

## What we'll learn:
- ChromaDB
- OpenAI Embeddings
- RAG using State Machine
- Retrieval, Augment and Generation as steps

In [1]:
# Only needed for Udacity workspace

import importlib.util
import sys

# Check if 'pysqlite3' is available before importing
if importlib.util.find_spec("pysqlite3") is not None:
    import pysqlite3
    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

In [2]:
import os
import chromadb
from chromadb.utils import embedding_functions
from chromadb.api.models.Collection import Collection
import pdfplumber
from dotenv import load_dotenv
from typing import TypedDict, List

from lib.state_machine import StateMachine, Step, EntryPoint, Termination, Resource
from lib.llm import LLM
from lib.messages import BaseMessage, UserMessage, SystemMessage

In [3]:
import logging
logging.getLogger('pdfminer').setLevel(logging.ERROR)

In [4]:
from dotenv import load_dotenv,dotenv_values
load_dotenv(dotenv_path=".env") # I added the dotenv_path parameter as a reminder for myself that can pass something here if needed... ex. "../../../.env"
config = dotenv_values() # could have also used os.environ.get("UDACITY_OPENAI_API_KEY") I think, but I like the config approach better
api_key = config.get("UDACITY_OPENAI_API_KEY")
base_url = config.get("BASE_URL")

In [5]:
sentence_list = [
    "Meta drops multimodal Llama 3.2 — here's why it's such a big deal",
    "Chip giant Nvidia acquires OctoAI, a Seattle startup that helps companies run AI models",
    "Google is bringing Gemini to all older Pixel Buds",
    "The first Intel Battlmage GPU benchmarks have leaked",
    "Dell partners with Nvidia to accelerate AI adoption in telecoms",
]
ids = ["id1", "id2", "id3", "id4", "id5"]

## ChromaDB with Default Embedding Function

In [6]:
chroma_client = chromadb.Client()

In [7]:
collection = chroma_client.create_collection(
    name="demo"
)

In [8]:
collection.add(
    documents=sentence_list,
    ids=ids
)

In [9]:
collection.count()

5

In [None]:
collection.peek(1)

In [None]:
collection.query(
    query_texts=["gadget"],
    n_results=2,
    include=['metadatas', 'documents', 'distances']
)

In [12]:
result = collection.query(
    query_texts=["gadget"],
    n_results=2,
    include=['metadatas', 'documents', 'distances']
)

result['documents'][0]

['Google is bringing Gemini to all older Pixel Buds',
 "Meta drops multimodal Llama 3.2 — here's why it's such a big deal"]

In [13]:
print(collection._embedding_function.name())

default


In [14]:
size = len(collection.peek(1)['embeddings'][0])
print(f"Size of the embeddings array: {size}")


Size of the embeddings array: 384


## OpenAI Embeddings

In [15]:
chroma_client.delete_collection(name="demo")

In [16]:
embeddings_fn = embedding_functions.OpenAIEmbeddingFunction(
    api_key=api_key,
    api_base= base_url
)

In [17]:
collection = chroma_client.create_collection(
    name="demo",
    embedding_function=embeddings_fn
)

In [18]:
collection.add(
    documents=sentence_list,
    ids=ids
)

In [19]:
collection.query(
    query_texts=["gadget"],
    n_results=2,
    include=['metadatas', 'documents', 'distances']
)

{'ids': [['id3', 'id4']],
 'embeddings': None,
 'documents': [['Google is bringing Gemini to all older Pixel Buds',
   'The first Intel Battlmage GPU benchmarks have leaked']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[0.46601054072380066, 0.48678600788116455]]}

In [20]:
print(collection._embedding_function.name())

openai


In [21]:
size = len(collection.peek(1)['embeddings'][0])
print(f"Size of the embeddings array: {size}")

Size of the embeddings array: 1536


## RAG

**Load**

In [22]:
file_path = "GlobalEVOutlook2025.pdf"
documents = []
page_nums = []

In [23]:
with pdfplumber.open(file_path) as pdf:
    for num, page in enumerate(pdf.pages, start=1):
        text = page.extract_text()
        if text:
            documents.append(text)
            page_nums.append(str(num))


In [24]:
collection = chroma_client.create_collection(
    name="traditional_rag",
    embedding_function=embeddings_fn
)

In [25]:
collection.add(
    documents=documents,
    ids=page_nums
)

**State Machine**

In [26]:
class State(TypedDict):
    messages: List[BaseMessage]
    question: str
    documents: List[str]
    answer: str

**RAG: Retrieve**

In [27]:
def retrieve(state:State, resource:Resource):
    question = state["question"]
    collection:Collection = resource.vars.get("collection")
    results = collection.query(
        query_texts=[question],
        n_results=3,
        include=['documents']
    )
    retrieved_docs = results['documents'][0]
    
    return {"documents": retrieved_docs}

**RAG: Augment**

In [28]:
def augment(state:State):
    question = state["question"]
    documents = state["documents"]
    context = "\n\n".join(documents)

    messages = [
        SystemMessage(content="You are an assistant for question-answering tasks."),
        UserMessage(
            content=(
                "Use the following pieces of retrieved context to answer the question. "
                "If you don't know the answer, just say that you don't know. "
                f"\n# Question: \n-> {question} "
                f"\n# Context: \n-> {context} "
                "\n# Answer: "
            )
        )
    ]

    return {"messages": messages}

**RAG: Generate**

In [29]:
def generate(state:State, resource:Resource):
    llm:LLM = resource.vars.get("llm")
    ai_message = llm.invoke(state["messages"])
    return {
        "answer": ai_message.content, 
        "messages": state["messages"] + [ai_message],
    }

In [30]:
workflow = StateMachine(State)

In [31]:
# Create steps
entry = EntryPoint()
retrieve_step = Step("retrieve", retrieve)
augment_step = Step("augment", augment)
generate_step = Step("generate", generate)
termination = Termination()
        
workflow.add_steps(
    [
        entry, 
        retrieve_step, 
        augment_step, 
        generate_step, 
        termination
    ]
)

In [32]:
# Add transitions
workflow.connect(entry, retrieve_step)
workflow.connect(retrieve_step, augment_step)
workflow.connect(augment_step, generate_step)
workflow.connect(generate_step, termination)

In [33]:
llm = LLM(
    model="gpt-4o-mini",
    temperature=0.3,
    api_key=api_key,
    base_url=base_url,
)

In [34]:
resource = Resource(
    vars = {
        "llm": llm,
        "collection": collection,
    }
)

In [35]:
initial_state: State = {
    "question": "What was the number of electric car sales and their market share in Brazil in 2024?",
}

In [36]:
run_object = workflow.run(initial_state, resource)

[StateMachine] Starting: __entry__
[StateMachine] Executing step: retrieve
[StateMachine] Executing step: augment
[StateMachine] Executing step: generate
[StateMachine] Terminating: __termination__


In [37]:
run_object.get_final_state()["answer"]

'In 2024, Brazil had nearly 125,000 electric car sales, which represented a market share of 6.5%.'