# RAG System

**Phase A:** Load Documents → Chunk → Embed → Store in Vector DB

**Phase B:** Query → Retrieve Similar Chunks → Generate Answer

In [2]:
# CELL 1: Install dependencies (run once)
# !pip install -r requirements.txt

In [2]:
# CELL 2: Imports
import os, warnings
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
load_dotenv()

from core import DocumentLoader, TextChunker, EmbeddingGenerator, VectorStore, Retriever, ResponseGenerator
from config import RAGConfig
from widgets import create_embedding_visualization, create_similarity_chart, create_chunk_statistics_dashboard

print("Imports OK!")
print(f"GEMINI_API_KEY: {'SET' if os.getenv('GEMINI_API_KEY') else 'NOT SET'}")

Imports OK!
GEMINI_API_KEY: SET


In [3]:
# CELL 3: Configuration - EDIT THESE VALUES
config = RAGConfig(
    chunk_size=500,
    chunk_overlap=50,
    chunking_strategy='sentence',
    embedding_model='all-MiniLM-L6-v2',
    top_k=5,
    llm_model='gemini-2.5-flash-lite',
    temperature=0.7,
    max_tokens=1024
)
print("Config set!")

Config set!


---
## Phase A: Indexing
---

In [4]:
# CELL 4: Load Documents - EDIT PATH HERE
documents = DocumentLoader.load_directory('./data/medical_diagnosis')

for doc in documents:
    print(f"{doc.source}: {len(doc):,} chars")
print(f"\nTotal: {len(documents)} docs")

celiac_disease_gluten.txt: 356 chars
chronic_fatigue_variant_1.txt: 292 chars
chronic_fatigue_variant_10.txt: 249 chars
chronic_fatigue_variant_11.txt: 209 chars
chronic_fatigue_variant_12.txt: 243 chars
chronic_fatigue_variant_13.txt: 203 chars
chronic_fatigue_variant_14.txt: 206 chars
chronic_fatigue_variant_15.txt: 232 chars
chronic_fatigue_variant_2.txt: 273 chars
chronic_fatigue_variant_3.txt: 268 chars
chronic_fatigue_variant_4.txt: 240 chars
chronic_fatigue_variant_5.txt: 247 chars
chronic_fatigue_variant_6.txt: 237 chars
chronic_fatigue_variant_7.txt: 224 chars
chronic_fatigue_variant_8.txt: 228 chars
chronic_fatigue_variant_9.txt: 213 chars
diabetes_type1.txt: 361 chars
fibromyalgia_central.txt: 337 chars
fibromyalgia_pain.txt: 354 chars
hypothyroidism_diagnosis.txt: 347 chars
hypothyroidism_metabolism.txt: 330 chars
inflammatory_bowel_disease.txt: 362 chars
lupus_autoimmune.txt: 362 chars
lupus_symptoms.txt: 276 chars
lyme_disease_stages.txt: 353 chars
lyme_disease_tick.txt: 

In [5]:
# CELL 5: Chunk documents
chunker = TextChunker(chunk_size=config.chunk_size, overlap=config.chunk_overlap, strategy=config.chunking_strategy)
chunks = chunker.chunk_documents(documents)
print(f"Created {len(chunks)} chunks (avg {chunker.get_statistics(chunks)['avg_length']:.0f} chars)")

Created 39 chunks (avg 263 chars)


In [6]:
# CELL 6: Visualize chunks (optional - comment out to skip)
# create_chunk_statistics_dashboard(chunks).show()
print(f"Chunk statistics: {len(chunks)} chunks created")

Chunk statistics: 39 chunks created


In [7]:
# CELL 7: Generate embeddings
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

embedder = EmbeddingGenerator(model_name=config.embedding_model, device=device)
embedded_chunks = embedder.embed_chunks(chunks, batch_size=32, show_progress=True)
print(f"\nGenerated {len(embedded_chunks)} embeddings")

Using device: cuda


Embedding chunks:   0%|          | 0/2 [00:00<?, ?it/s]

Loading embedding model: all-MiniLM-L6-v2...
Model loaded (dim=384)

Generated 39 embeddings


In [8]:
# CELL 8: Store in vector database
vector_store = VectorStore(collection_name='rag_collection', persist_directory='./chroma_db', reset=True)
count = vector_store.add(embedded_chunks)
print(f"Stored {count} vectors")

Deleted existing collection: rag_collection
Stored 39 vectors


In [None]:
# CELL 9: Visualize embedding space (deprecated)
#all_embeddings, all_metadata = vector_store.get_all_embeddings()

# Automatic subsampling to 2000 points + faster UMAP params
# Should take ~5-10s instead of 40s+
#create_embedding_visualization(all_embeddings, all_metadata, method='UMAP').show()

#print(f"Visualized {len(all_embeddings)} embeddings (subsampled to 2000 for speed)")

---
## Phase B: Query
---

In [10]:
# CELL 10: Initialize retriever and generator
retriever = Retriever(embedder, vector_store)
generator = ResponseGenerator(model=config.llm_model, temperature=config.temperature, max_tokens=config.max_tokens)
print(f"Ready! Using {config.llm_model}")

Ready! Using gemini-2.5-flash-lite


In [11]:
# CELL 11: ASK A QUESTION - EDIT YOUR QUERY HERE
query = "Patient presents with chronic fatigue, joint pain, and occasional low-grade fever. What conditions should be considered?"

# Retrieve
results = retriever.retrieve(query=query, k=config.top_k)
print(f"Query: {query}\n")
print("Retrieved chunks:")
for r in results:
    print(f"\n{'='*80}")
    print(f"[{r.rank}] Score: {r.score:.3f} | Source: {r.source}")
    print(f"{'='*80}")
    print(r.text)

# Generate
response = generator.generate(query=query, context_chunks=results)
print(f"\n{'='*50}\nANSWER:\n{'='*50}")
print(response.response)
print(f"\nSources: {', '.join(response.sources)}")

Query: Patient presents with chronic fatigue, joint pain, and occasional low-grade fever. What conditions should be considered?

Retrieved chunks:

[1] Score: 0.512 | Source: chronic_fatigue_variant_7.txt
Chronic fatigue syndrome manifests as severe, persistent exhaustion unrelieved by rest. Patients experience debilitating tiredness, mental cloudiness, and poor sleep. Daily activities become increasingly difficult to manage.

[2] Score: 0.492 | Source: chronic_fatigue_variant_15.txt
Chronic fatigue syndrome presents with severe, unrelenting tiredness that continues despite adequate rest. The exhaustion is all-pervasive, affecting mental clarity and physical ability. Daily tasks become extraordinarily difficult.

[3] Score: 0.487 | Source: lupus_symptoms.txt
Lupus patients experience diverse symptoms including fatigue, joint pain, and fever. The distinctive malar rash resembles a butterfly pattern on the face. Sun exposure often triggers symptom flares. Kidney disease develops in many 

In [13]:
# CELL 12: Visualize query in embedding space
all_embeddings, all_metadata = vector_store.get_all_embeddings()
query_embedding = retriever.last_query_embedding
retrieved_indices = retriever.get_retrieved_indices(results, all_metadata)
scores = [r.score for r in results]

fig = create_embedding_visualization(
    all_embeddings, all_metadata, method='UMAP',
    query_embedding=query_embedding,
    retrieved_indices=retrieved_indices,
    retrieval_scores=scores
)
fig.show()

In [None]:
# CELL 13: Quick ask function - use for more questions
def ask(question, top_k=5):
    results = retriever.retrieve(query=question, k=top_k)
    response = generator.generate(query=question, context_chunks=results)
    print(f"Q: {question}\n\nA: {response.response}\n\nSources: {', '.join(response.sources)}")
    return response

In [None]:
# CELL 14: Ask more questions
ask("What is deep learning?")

In [None]:
ask("How do I create a virtual environment in Python?")

---
## Method Comparison: Top-K vs MMR vs QUBO-RAG
**All three methods answer the same query, but with different retrieval strategies**
---

In [11]:
# Setup: Import and define test query
from core.retrieval_strategies import create_retrieval_strategy
import numpy as np

# Medical diagnosis query with overlapping symptoms  
test_query = "Patient presents with chronic fatigue, joint pain, and occasional low-grade fever. What conditions should be considered in the differential diagnosis?"
k = 5

print(f"Test Query: '{test_query}'")
print(f"Retrieving top {k} chunks with each method...\n")

Test Query: 'Patient presents with chronic fatigue, joint pain, and occasional low-grade fever. What conditions should be considered in the differential diagnosis?'
Retrieving top 5 chunks with each method...



### Method 1: Top-K (Naive)
Simple relevance-based retrieval - picks chunks with highest similarity scores

In [12]:
# METHOD 1: TOP-K (Naive)
print("="*70)
print("METHOD 1: TOP-K (NAIVE)")
print("="*70)

# Retrieve with Naive strategy
query_emb = embedder.embed_query(test_query)
candidates = vector_store.search(query_emb, k=k*3)
strategy = create_retrieval_strategy('naive')
results_naive, meta = strategy.retrieve(query_emb, candidates, k=k)

# Show retrieved chunks
print(f"\nRetrieved {len(results_naive)} chunks:")
for r in results_naive:
    print(f"  [{r.rank}] Score: {r.score:.3f} | {r.text[:80]}...")

# Generate answer
response_naive = generator.generate(query=test_query, context_chunks=results_naive)

print("\n" + "-"*70)
print("TOP-K ANSWER:")
print("-"*70)
print(response_naive.response)
print(f"\nSources: {', '.join(response_naive.sources)}")
print("="*70)

METHOD 1: TOP-K (NAIVE)

Retrieved 5 chunks:
  [1] Score: 0.630 | Rheumatoid Arthritis (RA) patients commonly present with joint pain, morning sti...
  [2] Score: 0.618 | Lyme Disease patients commonly present with fatigue, fever, and joint pain. Thes...
  [3] Score: 0.600 | Distinguishing features of Fibromyalgia include tender points and no inflammatio...
  [4] Score: 0.585 | Systemic Lupus Erythematosus (SLE) patients commonly present with chronic fatigu...
  [5] Score: 0.557 | Additional diagnostic workup for Chronic Fatigue Syndrome (ME/CFS) involves symp...

----------------------------------------------------------------------
TOP-K ANSWER:
----------------------------------------------------------------------
Rheumatoid Arthritis, Lyme Disease, and Systemic Lupus Erythematosus (SLE) should be considered in the differential diagnosis.

Sources: chronic_fatigue_diagnosis_2.txt, lyme_disease_symptoms_1.txt, fibromyalgia_symptoms_2.txt, lupus_symptoms_1.txt, rheumatoid_arthritis_sy

### Method 2: MMR (Maximal Marginal Relevance)
Balances relevance and diversity - avoids redundant chunks

In [13]:
# METHOD 2: MMR (Maximal Marginal Relevance)
print("="*70)
print("METHOD 2: MMR (MAXIMAL MARGINAL RELEVANCE)")
print("="*70)

# Retrieve with MMR strategy (lambda=0.5 balances relevance/diversity)
strategy = create_retrieval_strategy('mmr', lambda_param=0.5)
results_mmr, meta = strategy.retrieve(query_emb, candidates, k=k)

# Show retrieved chunks
print(f"\nRetrieved {len(results_mmr)} chunks:")
for r in results_mmr:
    print(f"  [{r.rank}] Score: {r.score:.3f} | {r.text[:80]}...")

# Generate answer
response_mmr = generator.generate(query=test_query, context_chunks=results_mmr)

print("\n" + "-"*70)
print("MMR ANSWER:")
print("-"*70)
print(response_mmr.response)
print(f"\nSources: {', '.join(response_mmr.sources)}")
print("="*70)

METHOD 2: MMR (MAXIMAL MARGINAL RELEVANCE)

Retrieved 5 chunks:
  [1] Score: 0.630 | Rheumatoid Arthritis (RA) patients commonly present with joint pain, morning sti...
  [2] Score: 0.530 | Additional complications of Chronic Fatigue Syndrome (ME/CFS) include social iso...
  [3] Score: 0.600 | Distinguishing features of Fibromyalgia include tender points and no inflammatio...
  [4] Score: 0.618 | Lyme Disease patients commonly present with fatigue, fever, and joint pain. Thes...
  [5] Score: 0.502 | Hypothyroidism patients commonly present with fatigue, weight gain, and joint pa...

----------------------------------------------------------------------
MMR ANSWER:
----------------------------------------------------------------------
Rheumatoid Arthritis, Lyme Disease, and Hypothyroidism should be considered in the differential diagnosis.

Sources: lyme_disease_symptoms_1.txt, chronic_fatigue_complications_2.txt, fibromyalgia_symptoms_2.txt, hypothyroidism_symptoms_1.txt, rheumatoid_ar

### Method 3: QUBO-RAG (Quantum-Inspired Optimization)
Uses ORBIT simulator to optimize relevance-diversity tradeoff via p-bit computing

In [None]:
# METHOD 3: QUBO-RAG (Quantum-Inspired with ORBIT)
print("="*70)
print("METHOD 3: QUBO-RAG (QUANTUM-INSPIRED OPTIMIZATION)")
print("="*70)

# Retrieve with QUBO strategy - TUNED FOR DIVERSITY
# alpha=0.35 emphasizes diversity (65% diversity weight!)
# n_replicas=4 for better optimization
# full_sweeps=10000 for convergence
strategy = create_retrieval_strategy('qubo', alpha=0.35, 
                                    solver_params={'n_replicas': 4, 'full_sweeps': 10000})
results_qubo, meta = strategy.retrieve(query_emb, candidates, k=k)

print(f"ORBIT simulation time: {meta['execution_time']:.2f}s")
print(f"Alpha: {meta['alpha']} (lower = more diversity emphasis)")
print(f"Constraint satisfied: {meta.get('constraint_satisfied', 'N/A')}")

# Show retrieved chunks
print(f"\nRetrieved {len(results_qubo)} chunks:")
for r in results_qubo:
    print(f"  [{r.rank}] Score: {r.score:.3f} | {r.text[:80]}...")

# Generate answer
response_qubo = generator.generate(query=test_query, context_chunks=results_qubo)

print("\n" + "-"*70)
print("QUBO-RAG ANSWER:")
print("-"*70)
print(response_qubo.response)
print(f"\nSources: {', '.join(response_qubo.sources)}")
print("="*70)

### Diversity Comparison
Compare how diverse the retrieved chunks are for each method

In [16]:
# Compare diversity metrics across all three methods
from core.diversity_metrics import compare_retrieval_methods, print_comparison_table

# Convert results to dict format for metrics
results_dict = {}
for name, results in [('Top-K', results_naive), ('MMR', results_mmr), ('QUBO-RAG', results_qubo)]:
    results_dict[name] = [{
        'id': r.id,
        'score': r.score,
        'embedding': next((c['embedding'] for c in candidates if c['id'] == r.id), None)
    } for r in results]

print("\n" + "="*70)
print("DIVERSITY METRICS COMPARISON")
print("="*70)
comparison = compare_retrieval_methods(results_dict)
print_comparison_table(comparison)

print("\nKey Insight:")
print(f"  • Top-K intra-list similarity:   {comparison['Top-K']['intra_list_similarity']:.4f}")
print(f"  • MMR intra-list similarity:     {comparison['MMR']['intra_list_similarity']:.4f}")  
print(f"  • QUBO-RAG intra-list similarity: {comparison['QUBO-RAG']['intra_list_similarity']:.4f}")
print("\n  → Lower = more diverse chunks (less redundancy)")
print("="*70)


DIVERSITY METRICS COMPARISON
Metric                              Top-K             MMR        QUBO-RAG
avg_score                          0.5979          0.5757          0.5609
intra_list_similarity              0.5006          0.4552          0.5347
max_score                          0.6295          0.6295          0.6181
min_score                          0.5566          0.5016          0.5222
num_results                             5               5               5
std_score                          0.0256          0.0508          0.0355

Interpretation:
- Intra-list similarity: Lower = more diverse
- Subtopic recall: Higher = better coverage
- Alpha-nDCG: Higher = better relevance + diversity balance

Key Insight:
  • Top-K intra-list similarity:   0.5006
  • MMR intra-list similarity:     0.4552
  • QUBO-RAG intra-list similarity: 0.5347

  → Lower = more diverse chunks (less redundancy)


In [None]:
# Compare Naive vs MMR vs QUBO
def compare_diverse(query, k=5):
    """Compare retrieval methods on a query."""
    query_emb = embedder.embed_query(query)
    candidates = vector_store.search(query_emb, k=k*3)
    
    # Strategies
    strategies = {
        'Naive': create_retrieval_strategy('naive'),
        'MMR': create_retrieval_strategy('mmr', lambda_param=0.5),
        'QUBO': create_retrieval_strategy('qubo', alpha=0.6, 
                                          solver_params={'n_replicas': 2, 'full_sweeps': 5000})
    }
    
    results_dict = {}
    all_results = {}
    
    for name, strategy in strategies.items():
        print(f"\n{name}...", end=' ')
        results, metadata = strategy.retrieve(query_emb, candidates, k=k)
        all_results[name] = results
        
        # Convert for metrics
        results_dict[name] = [{
            'id': r.id,
            'score': r.score,
            'embedding': next((c['embedding'] for c in candidates if c['id'] == r.id), None)
        } for r in results]
        
        print(f"{metadata.get('execution_time', 0):.2f}s")
    
    # Show metrics
    print("\n" + "="*70)
    comparison = compare_retrieval_methods(results_dict)
    print_comparison_table(comparison)
    
    return all_results, comparison

In [None]:
# Run comparison
query = "Who is Frodo and what is his quest?"
results, comparison = compare_diverse(query, k=5)

# Show results from each method
for method_name in ['Naive', 'MMR', 'QUBO']:
    print(f"\n{'='*70}")
    print(f"{method_name} Results:")
    print('='*70)
    for r in results[method_name][:3]:  # Show top 3
        print(f"[{r.rank}] {r.score:.3f} | {r.text[:100]}...")

In [None]:
# Generate answer using QUBO results
response = generator.generate(query=query, context_chunks=results['QUBO'])
print("="*70)
print("QUBO-RAG ANSWER:")
print("="*70)
print(response.response)
print(f"\nSources: {', '.join(response.sources)}")