In [1]:
#Loading Original Naive RAG from previous work
"""
Parameter Experimentation
Testing different embedding models and retrieval strategies
"""
import sys
sys.path.insert(0, '../src')
from naive_rag import NaiveRAG
from utils import calculate_f1_score, exact_match
import pandas as pd
import time
from tqdm import tqdm

print("STEP 4: PARAMETER EXPERIMENTS")
print("\nBaseline: Naive RAG from previous implementation")
print("- Embedding: all-MiniLM-L6-v2 (384d)")
print("- Retrieval: top-1")
print("- Results: F1=0.444, EM=39.0%")
print("\nTesting variations in embedding dimensions and top_k retrieval and evaluating performance for \\\
comparision to NaiveRAG implementation\n")

# Prepare test set (50 questions for efficiency)
test_size = 50

def run_evaluation(rag_system, test_qs, top_k, label):
	"""Run evaluation on a RAG configuration"""
	results = []
	start = time.time()
	
	for qa in tqdm(test_qs, desc=label):
		try:
			result = rag_system.query(qa['question'], top_k=top_k, prompt_strategy="basic")
			f1 = calculate_f1_score(result['answer'], qa['answer'])
			em = exact_match(result['answer'], qa['answer'])
			results.append({'f1': f1, 'em': em})
		except:
			results.append({'f1': 0.0, 'em': False})
	
	elapsed = time.time() - start
	avg_f1 = sum(r['f1'] for r in results) / len(results)
	avg_em = sum(r['em'] for r in results) / len(results)
	
	return {
		'avg_f1': avg_f1,
		'avg_em': avg_em * 100,
		'time_min': elapsed
	}

Utils module loaded successfully!
STEP 4: PARAMETER EXPERIMENTS

Baseline: Naive RAG from previous implementation
- Embedding: all-MiniLM-L6-v2 (384d)
- Retrieval: top-1
- Results: F1=0.444, EM=39.0%

Testing variations in embedding dimensions and top_k retrieval and evaluating performance for \comparision to NaiveRAG implementation



In [3]:
#Experiment Matrix
embedding_models = [
	"all-MiniLM-L6-v2",   # Baseline (384d)
	"all-mpnet-base-v2"    # Higher quality (768d)
]

top_k_values = [1, 3, 5]

results_matrix = []

# Load test questions once
rag_temp = NaiveRAG()
_, qa_pairs = rag_temp.load_dataset()
test_questions = [qa_pairs[i] for i in range(test_size)]

print(f"Testing {len(embedding_models)} embedding models × {len(top_k_values)} retrieval strategies")
print(f"= {len(embedding_models)* len(top_k_values)} configurations")
print(f"The evaluations are run for each on {test_size} questions\n")

2025-10-02 03:36:17,068 - naive_rag - INFO - Initializing RAG with embedding model: all-MiniLM-L6-v2
2025-10-02 03:36:17,073 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: cpu
2025-10-02 03:36:17,074 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2025-10-02 03:36:18,314 - naive_rag - INFO - RAG system initialized successfully
2025-10-02 03:36:18,316 - naive_rag - INFO - Loading RAG Mini Wikipedia dataset...
2025-10-02 03:36:18,316 - naive_rag - INFO - Loading text corpus...
2025-10-02 03:36:20,480 - naive_rag - INFO - Loading Q&A pairs...
2025-10-02 03:36:21,913 - naive_rag - INFO - Loaded 3200 documents and 918 Q&A pairs


Testing 2 embedding models × 3 retrieval strategies
= 6 configurations
The evaluations are run for each on 50 questions



In [5]:
#Running Experiments
for emb_model in embedding_models:
	print("\n")
	print(f"Embedding Model: {emb_model}")
	print("\n")
	
	# Initialize RAG with this embedding model
	rag = NaiveRAG(embedding_model_name=emb_model)
	rag.load_dataset()
	rag.create_embeddings(batch_size=32)
	rag.build_vector_db()
	rag.load_generator()
	
	for top_k in top_k_values:
		label = f"{emb_model[:20]}... | top-{top_k}"
		result = run_evaluation(rag, test_questions, top_k, label)
		
		results_matrix.append({
			'Embedding Model': emb_model,
			'Top-K': top_k,
			'Avg F1': f"{result['avg_f1']:.3f}",
			'EM %': f"{result['avg_em']:.1f}%",
			'Time (min)': f"{result['time_min']:.1f}"
		})
		
		print(f"\nResults for top-{top_k}:")
		print(f"  F1: {result['avg_f1']:.3f}")
		print(f"  EM: {result['avg_em']:.1f}%")

2025-10-02 03:37:43,937 - naive_rag - INFO - Initializing RAG with embedding model: all-MiniLM-L6-v2
2025-10-02 03:37:43,941 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: cpu
2025-10-02 03:37:43,941 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2




Embedding Model: all-MiniLM-L6-v2




2025-10-02 03:37:44,922 - naive_rag - INFO - RAG system initialized successfully
2025-10-02 03:37:44,922 - naive_rag - INFO - Loading RAG Mini Wikipedia dataset...
2025-10-02 03:37:44,922 - naive_rag - INFO - Loading text corpus...
2025-10-02 03:37:46,723 - naive_rag - INFO - Loading Q&A pairs...
2025-10-02 03:37:47,998 - naive_rag - INFO - Loaded 3200 documents and 918 Q&A pairs
2025-10-02 03:37:47,998 - naive_rag - INFO - Creating embeddings for 3200 documents...


Batches:   0%|          | 0/100 [00:00<?, ?it/s]

2025-10-02 03:38:54,189 - naive_rag - INFO - Created embeddings with shape: (3200, 384)
2025-10-02 03:38:54,189 - naive_rag - INFO - Building FAISS vector database...
2025-10-02 03:38:54,189 - naive_rag - INFO - FAISS index built with 3200 vectors
2025-10-02 03:38:54,201 - naive_rag - INFO - Loading generation model: google/flan-t5-base
2025-10-02 03:38:54,831 - naive_rag - INFO - Generation model loaded successfully
all-MiniLM-L6-v2... | top-1:   0%|                                                              | 0/50 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:   2%|█                                                     | 1/50 [00:00<00:46,  1.05it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:   4%|██▏                                                   | 2/50 [00:01<00:39,  1.21it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:   6%|███▏                                                  | 3/50 [00:02<00:33,  1.42it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:   8%|████▎                                                 | 4/50 [00:03<00:34,  1.35it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  10%|█████▍                                                | 5/50 [00:03<00:31,  1.45it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  12%|██████▍                                               | 6/50 [00:04<00:35,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  14%|███████▌                                              | 7/50 [00:05<00:30,  1.41it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  16%|████████▋                                             | 8/50 [00:06<00:33,  1.24it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  18%|█████████▋                                            | 9/50 [00:06<00:31,  1.32it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  20%|██████████▌                                          | 10/50 [00:07<00:24,  1.62it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  22%|███████████▋                                         | 11/50 [00:07<00:22,  1.74it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  24%|████████████▋                                        | 12/50 [00:08<00:22,  1.70it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  26%|█████████████▊                                       | 13/50 [00:08<00:20,  1.78it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  28%|██████████████▊                                      | 14/50 [00:09<00:17,  2.08it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  30%|███████████████▉                                     | 15/50 [00:09<00:21,  1.65it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  32%|████████████████▉                                    | 16/50 [00:11<00:25,  1.32it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  34%|██████████████████                                   | 17/50 [00:12<00:26,  1.22it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  36%|███████████████████                                  | 18/50 [00:13<00:33,  1.05s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  38%|████████████████████▏                                | 19/50 [00:14<00:26,  1.16it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  40%|█████████████████████▏                               | 20/50 [00:14<00:25,  1.19it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  42%|██████████████████████▎                              | 21/50 [00:15<00:25,  1.13it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  44%|███████████████████████▎                             | 22/50 [00:16<00:19,  1.40it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  46%|████████████████████████▍                            | 23/50 [00:16<00:16,  1.63it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  48%|█████████████████████████▍                           | 24/50 [00:17<00:15,  1.67it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  50%|██████████████████████████▌                          | 25/50 [00:17<00:14,  1.68it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  52%|███████████████████████████▌                         | 26/50 [00:17<00:11,  2.07it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  54%|████████████████████████████▌                        | 27/50 [00:18<00:11,  2.01it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  56%|█████████████████████████████▋                       | 28/50 [00:19<00:15,  1.38it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  58%|██████████████████████████████▋                      | 29/50 [00:20<00:13,  1.58it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  60%|███████████████████████████████▊                     | 30/50 [00:20<00:11,  1.69it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  62%|████████████████████████████████▊                    | 31/50 [00:20<00:10,  1.85it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  64%|█████████████████████████████████▉                   | 32/50 [00:21<00:09,  1.90it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  66%|██████████████████████████████████▉                  | 33/50 [00:22<00:09,  1.86it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  68%|████████████████████████████████████                 | 34/50 [00:22<00:08,  1.97it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  70%|█████████████████████████████████████                | 35/50 [00:23<00:08,  1.82it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  72%|██████████████████████████████████████▏              | 36/50 [00:24<00:10,  1.29it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  74%|███████████████████████████████████████▏             | 37/50 [00:26<00:14,  1.09s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  76%|████████████████████████████████████████▎            | 38/50 [00:27<00:13,  1.13s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  78%|█████████████████████████████████████████▎           | 39/50 [00:28<00:10,  1.00it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  80%|██████████████████████████████████████████▍          | 40/50 [00:29<00:10,  1.03s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  82%|███████████████████████████████████████████▍         | 41/50 [00:29<00:08,  1.11it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  84%|████████████████████████████████████████████▌        | 42/50 [00:30<00:06,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  86%|█████████████████████████████████████████████▌       | 43/50 [00:30<00:04,  1.41it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  88%|██████████████████████████████████████████████▋      | 44/50 [00:31<00:04,  1.40it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  90%|███████████████████████████████████████████████▋     | 45/50 [00:32<00:03,  1.32it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  92%|████████████████████████████████████████████████▊    | 46/50 [00:32<00:02,  1.61it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  94%|█████████████████████████████████████████████████▊   | 47/50 [00:33<00:01,  1.82it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  96%|██████████████████████████████████████████████████▉  | 48/50 [00:33<00:01,  1.95it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1:  98%|███████████████████████████████████████████████████▉ | 49/50 [00:34<00:00,  1.67it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-1: 100%|█████████████████████████████████████████████████████| 50/50 [00:34<00:00,  1.44it/s]



Results for top-1:
  F1: 0.516
  EM: 46.0%


all-MiniLM-L6-v2... | top-3:   0%|                                                              | 0/50 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:   2%|█                                                     | 1/50 [00:00<00:42,  1.15it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:   4%|██▏                                                   | 2/50 [00:01<00:38,  1.25it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:   6%|███▏                                                  | 3/50 [00:02<00:32,  1.44it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:   8%|████▎                                                 | 4/50 [00:02<00:32,  1.43it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  10%|█████▍                                                | 5/50 [00:03<00:31,  1.45it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  12%|██████▍                                               | 6/50 [00:04<00:35,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  14%|███████▌                                              | 7/50 [00:05<00:30,  1.39it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  16%|████████▋                                             | 8/50 [00:06<00:32,  1.27it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  18%|█████████▋                                            | 9/50 [00:06<00:30,  1.34it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  20%|██████████▌                                          | 10/50 [00:07<00:24,  1.64it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  22%|███████████▋                                         | 11/50 [00:07<00:21,  1.85it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  24%|████████████▋                                        | 12/50 [00:07<00:20,  1.85it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  26%|█████████████▊                                       | 13/50 [00:08<00:19,  1.94it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  28%|██████████████▊                                      | 14/50 [00:08<00:16,  2.22it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  30%|███████████████▉                                     | 15/50 [00:09<00:19,  1.83it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  32%|████████████████▉                                    | 16/50 [00:10<00:25,  1.34it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  34%|██████████████████                                   | 17/50 [00:11<00:28,  1.16it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  36%|███████████████████                                  | 18/50 [00:13<00:33,  1.04s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  38%|████████████████████▏                                | 19/50 [00:13<00:26,  1.15it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  40%|█████████████████████▏                               | 20/50 [00:14<00:24,  1.22it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  42%|██████████████████████▎                              | 21/50 [00:15<00:24,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  44%|███████████████████████▎                             | 22/50 [00:15<00:19,  1.46it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  46%|████████████████████████▍                            | 23/50 [00:15<00:15,  1.73it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  48%|█████████████████████████▍                           | 24/50 [00:16<00:15,  1.71it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  50%|██████████████████████████▌                          | 25/50 [00:17<00:14,  1.73it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  52%|███████████████████████████▌                         | 26/50 [00:17<00:11,  2.05it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  54%|████████████████████████████▌                        | 27/50 [00:17<00:10,  2.17it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  56%|█████████████████████████████▋                       | 28/50 [00:19<00:15,  1.47it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  58%|██████████████████████████████▋                      | 29/50 [00:19<00:12,  1.72it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  60%|███████████████████████████████▊                     | 30/50 [00:19<00:11,  1.71it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  62%|████████████████████████████████▊                    | 31/50 [00:20<00:10,  1.75it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  64%|█████████████████████████████████▉                   | 32/50 [00:21<00:14,  1.21it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  66%|██████████████████████████████████▉                  | 33/50 [00:22<00:13,  1.22it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  68%|████████████████████████████████████                 | 34/50 [00:23<00:11,  1.38it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  70%|█████████████████████████████████████                | 35/50 [00:24<00:11,  1.33it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  72%|██████████████████████████████████████▏              | 36/50 [00:25<00:11,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  74%|███████████████████████████████████████▏             | 37/50 [00:26<00:12,  1.06it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  76%|████████████████████████████████████████▎            | 38/50 [00:27<00:12,  1.07s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  78%|█████████████████████████████████████████▎           | 39/50 [00:28<00:11,  1.03s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  80%|██████████████████████████████████████████▍          | 40/50 [00:30<00:11,  1.14s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  82%|███████████████████████████████████████████▍         | 41/50 [00:30<00:08,  1.00it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  84%|████████████████████████████████████████████▌        | 42/50 [00:31<00:07,  1.11it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  86%|█████████████████████████████████████████████▌       | 43/50 [00:31<00:05,  1.29it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  88%|██████████████████████████████████████████████▋      | 44/50 [00:32<00:04,  1.38it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  90%|███████████████████████████████████████████████▋     | 45/50 [00:33<00:03,  1.35it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  92%|████████████████████████████████████████████████▊    | 46/50 [00:33<00:02,  1.62it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  94%|█████████████████████████████████████████████████▊   | 47/50 [00:33<00:01,  1.92it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  96%|██████████████████████████████████████████████████▉  | 48/50 [00:34<00:00,  2.03it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3:  98%|███████████████████████████████████████████████████▉ | 49/50 [00:35<00:00,  1.76it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-3: 100%|█████████████████████████████████████████████████████| 50/50 [00:35<00:00,  1.41it/s]



Results for top-3:
  F1: 0.516
  EM: 46.0%


all-MiniLM-L6-v2... | top-5:   0%|                                                              | 0/50 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:   2%|█                                                     | 1/50 [00:00<00:45,  1.08it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:   4%|██▏                                                   | 2/50 [00:01<00:40,  1.19it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:   6%|███▏                                                  | 3/50 [00:02<00:38,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:   8%|████▎                                                 | 4/50 [00:03<00:41,  1.10it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  10%|█████▍                                                | 5/50 [00:04<00:37,  1.20it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  12%|██████▍                                               | 6/50 [00:05<00:40,  1.09it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  14%|███████▌                                              | 7/50 [00:05<00:33,  1.28it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  16%|████████▋                                             | 8/50 [00:06<00:34,  1.20it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  18%|█████████▋                                            | 9/50 [00:07<00:32,  1.25it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  20%|██████████▌                                          | 10/50 [00:07<00:26,  1.50it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  22%|███████████▋                                         | 11/50 [00:08<00:23,  1.64it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  24%|████████████▋                                        | 12/50 [00:08<00:23,  1.63it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  26%|█████████████▊                                       | 13/50 [00:09<00:20,  1.80it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  28%|██████████████▊                                      | 14/50 [00:09<00:17,  2.09it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  30%|███████████████▉                                     | 15/50 [00:10<00:19,  1.80it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  32%|████████████████▉                                    | 16/50 [00:11<00:24,  1.38it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  34%|██████████████████                                   | 17/50 [00:12<00:25,  1.28it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  36%|███████████████████                                  | 18/50 [00:14<00:33,  1.03s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  38%|████████████████████▏                                | 19/50 [00:14<00:26,  1.17it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  40%|█████████████████████▏                               | 20/50 [00:15<00:23,  1.28it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  42%|██████████████████████▎                              | 21/50 [00:16<00:25,  1.16it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  44%|███████████████████████▎                             | 22/50 [00:16<00:19,  1.42it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  46%|████████████████████████▍                            | 23/50 [00:16<00:16,  1.67it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  48%|█████████████████████████▍                           | 24/50 [00:17<00:15,  1.66it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  50%|██████████████████████████▌                          | 25/50 [00:18<00:14,  1.69it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  52%|███████████████████████████▌                         | 26/50 [00:18<00:11,  2.04it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  54%|████████████████████████████▌                        | 27/50 [00:18<00:10,  2.20it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  56%|█████████████████████████████▋                       | 28/50 [00:20<00:15,  1.38it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  58%|██████████████████████████████▋                      | 29/50 [00:20<00:13,  1.53it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  60%|███████████████████████████████▊                     | 30/50 [00:21<00:12,  1.56it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  62%|████████████████████████████████▊                    | 31/50 [00:21<00:11,  1.67it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  64%|█████████████████████████████████▉                   | 32/50 [00:22<00:10,  1.75it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  66%|██████████████████████████████████▉                  | 33/50 [00:22<00:09,  1.75it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  68%|████████████████████████████████████                 | 34/50 [00:23<00:08,  1.96it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  70%|█████████████████████████████████████                | 35/50 [00:23<00:08,  1.85it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  72%|██████████████████████████████████████▏              | 36/50 [00:24<00:10,  1.38it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  74%|███████████████████████████████████████▏             | 37/50 [00:26<00:11,  1.16it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  76%|████████████████████████████████████████▎            | 38/50 [00:27<00:11,  1.02it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  78%|█████████████████████████████████████████▎           | 39/50 [00:27<00:09,  1.16it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  80%|██████████████████████████████████████████▍          | 40/50 [00:28<00:09,  1.08it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  82%|███████████████████████████████████████████▍         | 41/50 [00:29<00:07,  1.21it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  84%|████████████████████████████████████████████▌        | 42/50 [00:30<00:06,  1.31it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  86%|█████████████████████████████████████████████▌       | 43/50 [00:30<00:04,  1.49it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  88%|██████████████████████████████████████████████▋      | 44/50 [00:31<00:04,  1.37it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  90%|███████████████████████████████████████████████▋     | 45/50 [00:32<00:04,  1.24it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  92%|████████████████████████████████████████████████▊    | 46/50 [00:32<00:02,  1.39it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  94%|█████████████████████████████████████████████████▊   | 47/50 [00:33<00:01,  1.65it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  96%|██████████████████████████████████████████████████▉  | 48/50 [00:33<00:01,  1.78it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5:  98%|███████████████████████████████████████████████████▉ | 49/50 [00:34<00:00,  1.47it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-MiniLM-L6-v2... | top-5: 100%|█████████████████████████████████████████████████████| 50/50 [00:35<00:00,  1.42it/s]
2025-10-02 03:40:40,170 - naive_rag - INFO - Initializing RAG with embedding model: all-mpnet-base-v2
2025-10-02 03:40:40,185 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: cpu
2025-10-02 03:40:40,185 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: all-mpnet-base-v2



Results for top-5:
  F1: 0.516
  EM: 46.0%


Embedding Model: all-mpnet-base-v2




modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2025-10-02 03:40:49,412 - naive_rag - INFO - RAG system initialized successfully
2025-10-02 03:40:49,554 - naive_rag - INFO - Loading RAG Mini Wikipedia dataset...
2025-10-02 03:40:49,554 - naive_rag - INFO - Loading text corpus...
2025-10-02 03:40:50,958 - naive_rag - INFO - Loading Q&A pairs...
2025-10-02 03:40:52,125 - naive_rag - INFO - Loaded 3200 documents and 918 Q&A pairs
2025-10-02 03:40:52,125 - naive_rag - INFO - Creating embeddings for 3200 documents...


Batches:   0%|          | 0/100 [00:00<?, ?it/s]

2025-10-02 03:49:13,403 - naive_rag - INFO - Created embeddings with shape: (3200, 768)
2025-10-02 03:49:13,403 - naive_rag - INFO - Building FAISS vector database...
2025-10-02 03:49:13,412 - naive_rag - INFO - FAISS index built with 3200 vectors
2025-10-02 03:49:13,413 - naive_rag - INFO - Loading generation model: google/flan-t5-base
2025-10-02 03:49:14,749 - naive_rag - INFO - Generation model loaded successfully
all-mpnet-base-v2... | top-1:   0%|                                                             | 0/50 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:   2%|█                                                    | 1/50 [00:01<01:05,  1.34s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:   4%|██                                                   | 2/50 [00:02<00:50,  1.04s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:   6%|███▏                                                 | 3/50 [00:02<00:36,  1.30it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:   8%|████▏                                                | 4/50 [00:03<00:35,  1.31it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  10%|█████▎                                               | 5/50 [00:03<00:31,  1.41it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  12%|██████▎                                              | 6/50 [00:05<00:35,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  14%|███████▍                                             | 7/50 [00:05<00:31,  1.37it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  16%|████████▍                                            | 8/50 [00:06<00:35,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  18%|█████████▌                                           | 9/50 [00:07<00:34,  1.20it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  20%|██████████▍                                         | 10/50 [00:08<00:40,  1.02s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  22%|███████████▍                                        | 11/50 [00:09<00:33,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  24%|████████████▍                                       | 12/50 [00:10<00:32,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  26%|█████████████▌                                      | 13/50 [00:11<00:31,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  28%|██████████████▌                                     | 14/50 [00:12<00:38,  1.08s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  30%|███████████████▌                                    | 15/50 [00:13<00:34,  1.02it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  32%|████████████████▋                                   | 16/50 [00:14<00:34,  1.02s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  34%|█████████████████▋                                  | 17/50 [00:15<00:30,  1.10it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  36%|██████████████████▋                                 | 18/50 [00:17<00:41,  1.31s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  38%|███████████████████▊                                | 19/50 [00:18<00:39,  1.28s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  40%|████████████████████▊                               | 20/50 [00:19<00:35,  1.17s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  42%|█████████████████████▊                              | 21/50 [00:20<00:33,  1.16s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  44%|██████████████████████▉                             | 22/50 [00:20<00:25,  1.11it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  46%|███████████████████████▉                            | 23/50 [00:21<00:20,  1.31it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  48%|████████████████████████▉                           | 24/50 [00:22<00:20,  1.24it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  50%|██████████████████████████                          | 25/50 [00:22<00:19,  1.31it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  52%|███████████████████████████                         | 26/50 [00:24<00:23,  1.04it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  54%|████████████████████████████                        | 27/50 [00:25<00:24,  1.04s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  56%|█████████████████████████████                       | 28/50 [00:27<00:25,  1.16s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  58%|██████████████████████████████▏                     | 29/50 [00:27<00:19,  1.08it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  60%|███████████████████████████████▏                    | 30/50 [00:27<00:15,  1.25it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  62%|████████████████████████████████▏                   | 31/50 [00:28<00:12,  1.46it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  64%|█████████████████████████████████▎                  | 32/50 [00:28<00:11,  1.54it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  66%|██████████████████████████████████▎                 | 33/50 [00:29<00:11,  1.53it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  68%|███████████████████████████████████▎                | 34/50 [00:30<00:09,  1.68it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  70%|████████████████████████████████████▍               | 35/50 [00:30<00:08,  1.68it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  72%|█████████████████████████████████████▍              | 36/50 [00:31<00:10,  1.32it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  74%|██████████████████████████████████████▍             | 37/50 [00:33<00:11,  1.12it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  76%|███████████████████████████████████████▌            | 38/50 [00:34<00:11,  1.05it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  78%|████████████████████████████████████████▌           | 39/50 [00:34<00:09,  1.22it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  80%|█████████████████████████████████████████▌          | 40/50 [00:35<00:08,  1.16it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  82%|██████████████████████████████████████████▋         | 41/50 [00:36<00:09,  1.02s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  84%|███████████████████████████████████████████▋        | 42/50 [00:37<00:07,  1.03it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  86%|████████████████████████████████████████████▋       | 43/50 [00:38<00:05,  1.17it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  88%|█████████████████████████████████████████████▊      | 44/50 [00:39<00:04,  1.27it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  90%|██████████████████████████████████████████████▊     | 45/50 [00:39<00:03,  1.36it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  92%|███████████████████████████████████████████████▊    | 46/50 [00:40<00:02,  1.59it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  94%|████████████████████████████████████████████████▉   | 47/50 [00:40<00:01,  1.84it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  96%|█████████████████████████████████████████████████▉  | 48/50 [00:40<00:01,  1.91it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1:  98%|██████████████████████████████████████████████████▉ | 49/50 [00:41<00:00,  1.60it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-1: 100%|████████████████████████████████████████████████████| 50/50 [00:42<00:00,  1.18it/s]



Results for top-1:
  F1: 0.632
  EM: 56.0%


all-mpnet-base-v2... | top-3:   0%|                                                             | 0/50 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:   2%|█                                                    | 1/50 [00:01<01:30,  1.84s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:   4%|██                                                   | 2/50 [00:02<01:04,  1.35s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:   6%|███▏                                                 | 3/50 [00:03<00:43,  1.08it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:   8%|████▏                                                | 4/50 [00:04<00:41,  1.11it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  10%|█████▎                                               | 5/50 [00:04<00:36,  1.24it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  12%|██████▎                                              | 6/50 [00:05<00:39,  1.11it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  14%|███████▍                                             | 7/50 [00:06<00:34,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  16%|████████▍                                            | 8/50 [00:07<00:35,  1.19it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  18%|█████████▌                                           | 9/50 [00:08<00:37,  1.10it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  20%|██████████▍                                         | 10/50 [00:09<00:41,  1.05s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  22%|███████████▍                                        | 11/50 [00:10<00:34,  1.13it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  24%|████████████▍                                       | 12/50 [00:11<00:32,  1.17it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  26%|█████████████▌                                      | 13/50 [00:11<00:31,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  28%|██████████████▌                                     | 14/50 [00:13<00:36,  1.02s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  30%|███████████████▌                                    | 15/50 [00:13<00:30,  1.15it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  32%|████████████████▋                                   | 16/50 [00:15<00:33,  1.02it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  34%|█████████████████▋                                  | 17/50 [00:15<00:29,  1.13it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  36%|██████████████████▋                                 | 18/50 [00:17<00:35,  1.11s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  38%|███████████████████▊                                | 19/50 [00:18<00:31,  1.02s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  40%|████████████████████▊                               | 20/50 [00:18<00:28,  1.07it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  42%|█████████████████████▊                              | 21/50 [00:19<00:27,  1.07it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  44%|██████████████████████▉                             | 22/50 [00:20<00:21,  1.33it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  46%|███████████████████████▉                            | 23/50 [00:20<00:17,  1.54it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  48%|████████████████████████▉                           | 24/50 [00:21<00:17,  1.47it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  50%|██████████████████████████                          | 25/50 [00:22<00:16,  1.51it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  52%|███████████████████████████                         | 26/50 [00:23<00:21,  1.14it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  54%|████████████████████████████                        | 27/50 [00:24<00:22,  1.03it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  56%|█████████████████████████████                       | 28/50 [00:25<00:23,  1.08s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  58%|██████████████████████████████▏                     | 29/50 [00:26<00:18,  1.11it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  60%|███████████████████████████████▏                    | 30/50 [00:26<00:14,  1.34it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  62%|████████████████████████████████▏                   | 31/50 [00:27<00:12,  1.55it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  64%|█████████████████████████████████▎                  | 32/50 [00:27<00:11,  1.60it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  66%|██████████████████████████████████▎                 | 33/50 [00:28<00:10,  1.59it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  68%|███████████████████████████████████▎                | 34/50 [00:28<00:08,  1.82it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  70%|████████████████████████████████████▍               | 35/50 [00:29<00:08,  1.70it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  72%|█████████████████████████████████████▍              | 36/50 [00:30<00:10,  1.29it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  74%|██████████████████████████████████████▍             | 37/50 [00:31<00:12,  1.07it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  76%|███████████████████████████████████████▌            | 38/50 [00:33<00:12,  1.05s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  78%|████████████████████████████████████████▌           | 39/50 [00:33<00:09,  1.14it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  80%|█████████████████████████████████████████▌          | 40/50 [00:34<00:08,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  82%|██████████████████████████████████████████▋         | 41/50 [00:35<00:07,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  84%|███████████████████████████████████████████▋        | 42/50 [00:35<00:06,  1.31it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  86%|████████████████████████████████████████████▋       | 43/50 [00:36<00:05,  1.36it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  88%|█████████████████████████████████████████████▊      | 44/50 [00:37<00:04,  1.47it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  90%|██████████████████████████████████████████████▊     | 45/50 [00:37<00:03,  1.57it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  92%|███████████████████████████████████████████████▊    | 46/50 [00:38<00:02,  1.74it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  94%|████████████████████████████████████████████████▉   | 47/50 [00:38<00:01,  1.92it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  96%|█████████████████████████████████████████████████▉  | 48/50 [00:38<00:01,  1.97it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3:  98%|██████████████████████████████████████████████████▉ | 49/50 [00:39<00:00,  1.62it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-3: 100%|████████████████████████████████████████████████████| 50/50 [00:40<00:00,  1.24it/s]



Results for top-3:
  F1: 0.632
  EM: 56.0%


all-mpnet-base-v2... | top-5:   0%|                                                             | 0/50 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:   2%|█                                                    | 1/50 [00:01<01:03,  1.29s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:   4%|██                                                   | 2/50 [00:02<00:48,  1.01s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:   6%|███▏                                                 | 3/50 [00:02<00:35,  1.31it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:   8%|████▏                                                | 4/50 [00:03<00:37,  1.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  10%|█████▎                                               | 5/50 [00:04<00:32,  1.37it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  12%|██████▎                                              | 6/50 [00:05<00:37,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  14%|███████▍                                             | 7/50 [00:05<00:32,  1.34it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  16%|████████▍                                            | 8/50 [00:07<00:39,  1.05it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  18%|█████████▌                                           | 9/50 [00:08<00:42,  1.03s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  20%|██████████▍                                         | 10/50 [00:09<00:49,  1.23s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  22%|███████████▍                                        | 11/50 [00:10<00:41,  1.06s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  24%|████████████▍                                       | 12/50 [00:11<00:40,  1.07s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  26%|█████████████▌                                      | 13/50 [00:12<00:38,  1.04s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  28%|██████████████▌                                     | 14/50 [00:14<00:43,  1.20s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  30%|███████████████▌                                    | 15/50 [00:14<00:35,  1.02s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  32%|████████████████▋                                   | 16/50 [00:16<00:39,  1.16s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  34%|█████████████████▋                                  | 17/50 [00:17<00:35,  1.06s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  36%|██████████████████▋                                 | 18/50 [00:18<00:40,  1.27s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  38%|███████████████████▊                                | 19/50 [00:19<00:34,  1.12s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  40%|████████████████████▊                               | 20/50 [00:20<00:30,  1.03s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  42%|█████████████████████▊                              | 21/50 [00:21<00:29,  1.00s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  44%|██████████████████████▉                             | 22/50 [00:21<00:22,  1.25it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  46%|███████████████████████▉                            | 23/50 [00:22<00:19,  1.37it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  48%|████████████████████████▉                           | 24/50 [00:23<00:19,  1.32it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  50%|██████████████████████████                          | 25/50 [00:23<00:18,  1.38it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  52%|███████████████████████████                         | 26/50 [00:25<00:23,  1.01it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  54%|████████████████████████████                        | 27/50 [00:26<00:24,  1.07s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  56%|█████████████████████████████                       | 28/50 [00:27<00:24,  1.13s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  58%|██████████████████████████████▏                     | 29/50 [00:28<00:19,  1.09it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  60%|███████████████████████████████▏                    | 30/50 [00:28<00:15,  1.32it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  62%|████████████████████████████████▏                   | 31/50 [00:29<00:12,  1.55it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  64%|█████████████████████████████████▎                  | 32/50 [00:29<00:11,  1.63it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  66%|██████████████████████████████████▎                 | 33/50 [00:30<00:10,  1.61it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  68%|███████████████████████████████████▎                | 34/50 [00:30<00:09,  1.72it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  70%|████████████████████████████████████▍               | 35/50 [00:31<00:09,  1.62it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  72%|█████████████████████████████████████▍              | 36/50 [00:32<00:10,  1.36it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  74%|██████████████████████████████████████▍             | 37/50 [00:33<00:11,  1.11it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  76%|███████████████████████████████████████▌            | 38/50 [00:35<00:11,  1.01it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  78%|████████████████████████████████████████▌           | 39/50 [00:35<00:09,  1.18it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  80%|█████████████████████████████████████████▌          | 40/50 [00:36<00:08,  1.17it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  82%|██████████████████████████████████████████▋         | 41/50 [00:37<00:07,  1.24it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  84%|███████████████████████████████████████████▋        | 42/50 [00:37<00:06,  1.29it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  86%|████████████████████████████████████████████▋       | 43/50 [00:38<00:04,  1.44it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  88%|█████████████████████████████████████████████▊      | 44/50 [00:38<00:04,  1.44it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  90%|██████████████████████████████████████████████▊     | 45/50 [00:39<00:03,  1.49it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  92%|███████████████████████████████████████████████▊    | 46/50 [00:40<00:02,  1.61it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  94%|████████████████████████████████████████████████▉   | 47/50 [00:40<00:01,  1.71it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  96%|█████████████████████████████████████████████████▉  | 48/50 [00:41<00:01,  1.67it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5:  98%|██████████████████████████████████████████████████▉ | 49/50 [00:42<00:00,  1.31it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

all-mpnet-base-v2... | top-5: 100%|████████████████████████████████████████████████████| 50/50 [00:43<00:00,  1.16it/s]


Results for top-5:
  F1: 0.632
  EM: 56.0%





In [7]:
#Displaying and saving results
df = pd.DataFrame(results_matrix)

print("\n")
print("STEP 4: PARAMETER EXPERIMENT RESULTS\n")
print(df.to_string(index=False))

# Save results
df.to_csv('../results/04_parameter_experiments.csv', index=False)
print("\nResults saved to ../results/step4_parameter_experiments.csv")



STEP 4: PARAMETER EXPERIMENT RESULTS

  Embedding Model  Top-K Avg F1  EM % Time (min)
 all-MiniLM-L6-v2      1  0.516 46.0%        0.6
 all-MiniLM-L6-v2      3  0.516 46.0%        0.6
 all-MiniLM-L6-v2      5  0.516 46.0%        0.6
all-mpnet-base-v2      1  0.632 56.0%        0.7
all-mpnet-base-v2      3  0.632 56.0%        0.7
all-mpnet-base-v2      5  0.632 56.0%        0.7

Results saved to ../results/step4_parameter_experiments.csv
