In [None]:
# Evaluation of enhanced RAG pipeline - results in results folder.
#Setting up
import sys
sys.path.insert(0, '../src')

from enhanced_rag import EnhancedRAG
from utils import calculate_f1_score, exact_match
import pandas as pd
from tqdm import tqdm

print("Initializing Enhanced RAG...")
enhanced_rag = EnhancedRAG(embedding_model_name="all-mpnet-base-v2")
documents, qa_pairs = enhanced_rag.load_dataset()
enhanced_rag.create_embeddings(batch_size=32)
enhanced_rag.build_vector_db()
enhanced_rag.load_generator()

print("\nEnhanced RAG ready")
print("Enhancements: Re-ranking + Query Rewriting")

In [None]:
#Evaluating 50 questions for quick evaluations
test_size = 50
test_questions = [qa_pairs[i] for i in range(test_size)]

# Test different enhancement combinations
configs = [
	{'name': 'Baseline (no enhancements)', 'rerank': False, 'rewrite': False},
	{'name': 'Re-ranking only', 'rerank': True, 'rewrite': False},
	{'name': 'Query rewriting only', 'rerank': False, 'rewrite': True},
	{'name': 'Both enhancements', 'rerank': True, 'rewrite': True}
]

print(f"Testing {len(configs)} configurations on {test_size} questions")

In [None]:
#Evaluating
results = []

for config in configs:
	print(f"\n")
	print(f"Testing: {config['name']}")
	print("\n")
	
	scores = []
	for qa in tqdm(test_questions, desc=config['name']):
		try:
			result = enhanced_rag.query(
				qa['question'],
				top_k=3,
				prompt_strategy="basic",
				use_reranking=config['rerank'],
				use_rewriting=config['rewrite']
			)
			
			f1 = calculate_f1_score(result['answer'], qa['answer'])
			em = exact_match(result['answer'], qa['answer'])
			scores.append({'f1': f1, 'em': em})
		except:
			scores.append({'f1': 0.0, 'em': False})
	
	avg_f1 = sum(s['f1'] for s in scores) / len(scores)
	avg_em = sum(s['em'] for s in scores) / len(scores)
	
	results.append({
		'Configuration': config['name'],
		'F1 Score': f"{avg_f1:.3f}",
		'Exact Match %': f"{avg_em*100:.1f}%",
		'Reranking': config['rerank'],
		'Query Rewriting': config['rewrite']
	})
	
	print(f"F1: {avg_f1:.3f}, EM: {avg_em*100:.1f}%")

# Display results
df = pd.DataFrame(results)
print("\n")
print("Enhanced RAG Eval Results")
print("\n")
print(df.to_string(index=False))

# Save
df.to_csv('../results/05_enhancement_eval.csv', index=False)
#Evaluation saved to ../results
print("Evaluation saved to ..\results as 05_enhancement_eval")