In [None]:
# Import required modules
import sys
import os
import logging
from typing import List, Dict, Any

# Add src to path
sys.path.append(os.path.join(os.getcwd(), '..', 'src'))

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("Setup complete!")


In [None]:
from config import REALMRAGConfig, DEFAULT_CONFIG
from realm_rag import REALMRAGModel
from data_utils import create_sample_data

# Create configuration
config = DEFAULT_CONFIG

# Modify for demo (smaller models and data)
config.model.num_retrieved_docs = 3
config.training.batch_size = 2
config.training.num_epochs = 1
config.experiment.use_wandb = False

print("Configuration:")
print(f"  Model type: RAG")
print(f"  Retrieved docs: {config.model.num_retrieved_docs}")
print(f"  Batch size: {config.training.batch_size}")
print(f"  Generator model: {config.model.generator_model_name}")

# Create sample data
sample_data = create_sample_data(num_examples=20, num_docs=50)

print("\nSample data created:")
print(f"  Training examples: {len(sample_data['datasets']['train'])}")
print(f"  Test examples: {len(sample_data['datasets']['test'])}")
print(f"  Knowledge base size: {len(sample_data['knowledge_base'])}")

# Initialize model
model = REALMRAGModel(config, model_type="rag")
print("\nModel initialized successfully!")

# Prepare knowledge base
model.prepare_knowledge_base(sample_data['knowledge_base'])
print("Knowledge base prepared!")


In [None]:
# Test questions
test_questions = [
    "What is the capital of Country0?",
    "How does Process1 work?",
    "What is Concept2?",
    "Who invented Invention3?"
]

print("Testing inference with sample questions...")
results = model.retrieve_and_generate(
    test_questions,
    k=3,
    return_retrieved_docs=True
)

print("\n=== Question Answering Results ===")
for i, question in enumerate(test_questions):
    print(f"\nQuestion {i+1}: {question}")
    print(f"Answer: {results['answers'][i]}")
    print(f"Retrieved {len(results['retrieved_docs'][i])} documents")
    
    # Show top retrieved document
    top_doc = results['retrieved_docs'][i][0]
    print(f"Top document (score: {top_doc['score']:.4f}):")
    print(f"  {top_doc['text'][:100]}...")

print("\n✅ Question answering test completed!")


In [None]:
from evaluator import REALMRAGEvaluator
from data_utils import REALMRAGDataset

# Create evaluator
evaluator = REALMRAGEvaluator(model, config)

# Create test dataset
test_dataset = REALMRAGDataset(sample_data['datasets']['test'], config.model)

# Evaluate
print("Running evaluation...")
eval_results = evaluator.evaluate_dataset(test_dataset, split="test")

print("\n=== Evaluation Results ===")
print(f"Exact Match: {eval_results['exact_match']:.4f}")
print(f"F1 Score: {eval_results['f1_score']:.4f}")
print(f"Precision: {eval_results['precision']:.4f}")
print(f"Recall: {eval_results['recall']:.4f}")
print(f"BLEU-4: {eval_results['bleu_4']:.4f}")
print(f"ROUGE-L F1: {eval_results['rougeL_f1']:.4f}")
print(f"Retrieval F1: {eval_results['retrieval_f1']:.4f}")
print(f"Mean Reciprocal Rank: {eval_results['mean_reciprocal_rank']:.4f}")
print(f"Hit@1: {eval_results['hit_at_1']:.4f}")
print(f"Hit@3: {eval_results['hit_at_3']:.4f}")

print("\n🎉 REALM/RAG Demo completed successfully!")
print("\nKey features demonstrated:")
print("✓ Dense passage retrieval with FAISS")
print("✓ Seq2seq generation with retrieved context")
print("✓ Comprehensive evaluation metrics")
print("✓ Both RAG and REALM architectures support")
print("✓ End-to-end question answering pipeline")
