# RAG System

**Phase A:** Load Documents → Chunk → Embed → Store in Vector DB

**Phase B:** Query → Retrieve Similar Chunks → Generate Answer

In [2]:
# CELL 1: Install dependencies (run once)
# !pip install -r requirements.txt

In [3]:
# CELL 2: Imports
import os, warnings
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
load_dotenv()

from core import DocumentLoader, TextChunker, EmbeddingGenerator, VectorStore, Retriever, ResponseGenerator
from config import RAGConfig
from widgets import create_embedding_visualization, create_similarity_chart, create_chunk_statistics_dashboard

print("Imports OK!")
print(f"GEMINI_API_KEY: {'SET' if os.getenv('GEMINI_API_KEY') else 'NOT SET'}")

Imports OK!
GEMINI_API_KEY: SET


In [4]:
# CELL 3: Configuration - EDIT THESE VALUES
config = RAGConfig(
    chunk_size=500,
    chunk_overlap=50,
    chunking_strategy='sentence',
    embedding_model='all-MiniLM-L6-v2',
    top_k=5,
    llm_model='gemini-2.5-flash-lite',
    temperature=0.7,
    max_tokens=1024
)
print("Config set!")

Config set!


---
## Phase A: Indexing
---

In [5]:
# CELL 4: Load Documents - EDIT PATH HERE
documents = DocumentLoader.load_directory('./data/lotr')

for doc in documents:
    print(f"{doc.source}: {len(doc):,} chars")
print(f"\nTotal: {len(documents)} docs")

lotr_full_text.txt: 2,579,963 chars

Total: 1 docs


In [6]:
# CELL 5: Chunk documents
chunker = TextChunker(chunk_size=config.chunk_size, overlap=config.chunk_overlap, strategy=config.chunking_strategy)
chunks = chunker.chunk_documents(documents)
print(f"Created {len(chunks)} chunks (avg {chunker.get_statistics(chunks)['avg_length']:.0f} chars)")

Created 6050 chunks (avg 432 chars)


In [7]:
# CELL 6: Visualize chunks (optional)
create_chunk_statistics_dashboard(chunks).show()

In [8]:
# CELL 7: Generate embeddings
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

embedder = EmbeddingGenerator(model_name=config.embedding_model, device=device)
embedded_chunks = embedder.embed_chunks(chunks, batch_size=32, show_progress=True)
print(f"\nGenerated {len(embedded_chunks)} embeddings")

Using device: cuda


Embedding chunks:   0%|          | 0/190 [00:00<?, ?it/s]

Loading embedding model: all-MiniLM-L6-v2...
Model loaded (dim=384)

Generated 6050 embeddings


In [9]:
# CELL 8: Store in vector database
vector_store = VectorStore(collection_name='rag_collection', persist_directory='./chroma_db', reset=True)
count = vector_store.add(embedded_chunks)
print(f"Stored {count} vectors")

Deleted existing collection: rag_collection
Stored 6050 vectors


In [10]:
# CELL 9: Visualize embedding space (optional)
all_embeddings, all_metadata = vector_store.get_all_embeddings()
create_embedding_visualization(all_embeddings, all_metadata, method='UMAP').show()

---
## Phase B: Query
---

In [11]:
# CELL 10: Initialize retriever and generator
retriever = Retriever(embedder, vector_store)
generator = ResponseGenerator(model=config.llm_model, temperature=config.temperature, max_tokens=config.max_tokens)
print(f"Ready! Using {config.llm_model}")

Ready! Using gemini-2.5-flash-lite


In [None]:
# CELL 11: ASK A QUESTION - EDIT YOUR QUERY HERE
query = "What is a potato"

# Retrieve
results = retriever.retrieve(query=query, k=config.top_k)
print(f"Query: {query}\n")
print("Retrieved chunks:")
for r in results:
    print(f"\n{'='*80}")
    print(f"[{r.rank}] Score: {r.score:.3f} | Source: {r.source}")
    print(f"{'='*80}")
    print(r.text)

# Generate
response = generator.generate(query=query, context_chunks=results)
print(f"\n{'='*50}\nANSWER:\n{'='*50}")
print(response.response)
print(f"\nSources: {', '.join(response.sources)}")

Query: What is machine learning?

Retrieved chunks:

[1] Score: 0.142 | Source: lotr_full_text.txt
Frodo?' he said. 'What's the time? Seems to be getting late!'
     'No it isn't,' said Frodo. `But the day is getting darker instead of lighter: darker and darker. As far as I can tell, it isn't midday yet, and you've only slept for about three hours.'
     'I wonder what's up,' said Sam. 'Is there a storm coming? If so it's going to be the worst there ever was. We shall wish we were down a deep hole, not just stuck under a hedge.' He listened. `What's that? Thunder, or drums, or what is it?

[2] Score: 0.136 | Source: lotr_full_text.txt
Speak, friend, and enter_. And underneath small and faint is written: _I, Narvi, made them. Celebrimbor of Hollin drew these signs._'
     `What does it mean by _speak, friend, and enter_?' asked Merry. 'That is plain enough,' said Gimli. `If you are a friend, speak the password, and the doors will open, and you can enter.'
     'Yes,' said Gandalf, 'thes

In [None]:
# CELL 12: Visualize query in embedding space with score coloring
query_embedding = retriever.last_query_embedding
retrieved_indices = retriever.get_retrieved_indices(results, all_metadata)
scores = [r.score for r in results]

fig = create_embedding_visualization(
    all_embeddings, all_metadata, method='UMAP',
    query_embedding=query_embedding,
    retrieved_indices=retrieved_indices,
    retrieval_scores=scores
)
fig.show()

In [None]:
# CELL 13: Quick ask function - use for more questions
def ask(question, top_k=5):
    results = retriever.retrieve(query=question, k=top_k)
    response = generator.generate(query=question, context_chunks=results)
    print(f"Q: {question}\n\nA: {response.response}\n\nSources: {', '.join(response.sources)}")
    return response

In [None]:
# CELL 14: Ask more questions
ask("What is deep learning?")

In [None]:
ask("How do I create a virtual environment in Python?")

---
## Diverse Retrieval: MMR vs QUBO
Compare retrieval methods for better diversity
---

In [None]:
# Import diversity modules
from core.retrieval_strategies import create_retrieval_strategy
from core.diversity_metrics import compare_retrieval_methods, print_comparison_table
import numpy as np

print("Diversity modules loaded!")

In [None]:
# Compare Naive vs MMR vs QUBO
def compare_diverse(query, k=5):
    """Compare retrieval methods on a query."""
    query_emb = embedder.embed_query(query)
    candidates = vector_store.search(query_emb, k=k*3)
    
    # Strategies
    strategies = {
        'Naive': create_retrieval_strategy('naive'),
        'MMR': create_retrieval_strategy('mmr', lambda_param=0.5),
        'QUBO': create_retrieval_strategy('qubo', alpha=0.6, 
                                          solver_params={'n_replicas': 2, 'full_sweeps': 5000})
    }
    
    results_dict = {}
    all_results = {}
    
    for name, strategy in strategies.items():
        print(f"\n{name}...", end=' ')
        results, metadata = strategy.retrieve(query_emb, candidates, k=k)
        all_results[name] = results
        
        # Convert for metrics
        results_dict[name] = [{
            'id': r.id,
            'score': r.score,
            'embedding': next((c['embedding'] for c in candidates if c['id'] == r.id), None)
        } for r in results]
        
        print(f"{metadata.get('execution_time', 0):.2f}s")
    
    # Show metrics
    print("\n" + "="*70)
    comparison = compare_retrieval_methods(results_dict)
    print_comparison_table(comparison)
    
    return all_results, comparison

In [None]:
# Run comparison
query = "Who is Frodo and what is his quest?"
results, comparison = compare_diverse(query, k=5)

# Show results from each method
for method_name in ['Naive', 'MMR', 'QUBO']:
    print(f"\n{'='*70}")
    print(f"{method_name} Results:")
    print('='*70)
    for r in results[method_name][:3]:  # Show top 3
        print(f"[{r.rank}] {r.score:.3f} | {r.text[:100]}...")

In [None]:
# Generate answer using QUBO results
response = generator.generate(query=query, context_chunks=results['QUBO'])
print("="*70)
print("QUBO-RAG ANSWER:")
print("="*70)
print(response.response)
print(f"\nSources: {', '.join(response.sources)}")