## import packages

In [1]:
import pandas as pd
from tqdm import tqdm
from app.services.evaluator import compute_recall,compute_precision,compute_average_precision,compute_precision_at_k,compute_mrr
from app.services.bm25_service import BM25Service
import os


dataset_name1='nano-beir/arguana'
dataset_name2='beir/webis-touche2020/v2'
dataset_name3='beir/quora/test'
dataset_name4='antique/test'


datasetname=dataset_name4
name=datasetname.replace("/", "-").replace("\\", "_").strip()

## Load queries and qrels files

In [2]:
if not datasetname:
    raise ValueError("datasetname variable is not defined")

qrels_df = pd.read_csv(f"data/{name}/qrels.tsv", sep="\t", names=["query_id", "doc_id", "relevance"])
# print(qrels_df)
queries_df = pd.read_csv(f"data/{name}/queries.tsv", sep="\t", names=["query_id", "text"])


# make instance from VSM

In [3]:
bm25=BM25Service(datasetname)
bm25.load()


⚠️ Loading BM25 model for collection: antique-test


{'collection': 'antique-test',
 'total_documents': 403666,
 'sample_doc_id': '2020338_0',
 'sample_tokens': ['a',
  'small',
  'group',
  'of',
  'politicians',
  'believed',
  'strongly',
  'that',
  'the',
  'fact'],
 'inverted_index_size': 190239}

## Evaluation 

In [4]:



all_avg_precisions = []
all_prec_at_10 = []
all_mrr_ranks = []
all_recall = []

for _, row in tqdm(queries_df.iterrows(), total=len(queries_df)):
    query_id = row["query_id"]
    query_text = row["text"]

    # Use your VSM search function
    search_results = bm25.search(query_text,top_k=1000)
     # Access the list of result dicts
    result_items = search_results["results"]
    retrieved_docs = [str(doc["doc_id"]) for doc in result_items]

    # Step 2: Convert both retrieved and relevant doc_ids to strings
    relevant_docs = set(qrels_df[qrels_df["query_id"] == query_id]["doc_id"].astype(str))


    # retrieved_docs = [doc["doc_id"] for doc in search_results]

    # Compute metrics
    avg_precision = compute_average_precision(relevant_docs, retrieved_docs)
    prec_at_10 = compute_precision_at_k(relevant_docs, retrieved_docs, k=10)
    recall=compute_recall(relevant_docs, retrieved_docs)

    # Compute rank of first relevant doc for MRR
    rank = 0
    for i, doc_id in enumerate(retrieved_docs):
        if doc_id in relevant_docs:
            rank = i + 1
            break

    all_avg_precisions.append(avg_precision)
    all_prec_at_10.append(prec_at_10)
    all_mrr_ranks.append(rank)
    all_recall.append(recall)

# Final scores
map_score = sum(all_avg_precisions) / len(all_avg_precisions)
mean_prec_at_10 = sum(all_prec_at_10) / len(all_prec_at_10)
mrr_score = compute_mrr(all_mrr_ranks)

print(f"MAP: {map_score:.4f}")
print(f"MRR: {mrr_score:.4f}")
print(f"All Recall values: {all_recall}")
print(f"All Precision@10 values: {all_prec_at_10}")


100%|██████████| 200/200 [10:02<00:00,  3.01s/it]

MAP: 0.2915
MRR: 0.8696
All Recall values: [0.9166666666666666, 0.7666666666666667, 0.6521739130434783, 0.5365853658536586, 0.8387096774193549, 0.5277777777777778, 0.8181818181818182, 0.6896551724137931, 0.6774193548387096, 0.05555555555555555, 0.6216216216216216, 0.782608695652174, 0.8, 0.8709677419354839, 0.7777777777777778, 0.45161290322580644, 0.6956521739130435, 0.5454545454545454, 0.5588235294117647, 0.8, 0.3, 0.7, 0.6756756756756757, 0.8846153846153846, 0.875, 0.7058823529411765, 0.5294117647058824, 0.41379310344827586, 0.4411764705882353, 0.725, 0.4772727272727273, 0.7777777777777778, 0.88, 0.41025641025641024, 0.7586206896551724, 0.7857142857142857, 0.6122448979591837, 0.7333333333333333, 0.5714285714285714, 0.896551724137931, 0.5476190476190477, 0.8333333333333334, 0.9565217391304348, 0.6296296296296297, 0.8333333333333334, 0.8235294117647058, 0.8333333333333334, 0.5142857142857142, 0.6923076923076923, 0.9, 0.7, 0.7096774193548387, 0.25, 0.5365853658536586, 0.6571428571428571




## save result

In [7]:
from pathlib import Path

summary = {
    "Mean Average Precision": map_score,
    "Mean Reciprocal Rank": mrr_score
}
import json

output_dir = os.path.join("results", "BM25", name)
os.makedirs(output_dir)
output_path = os.path.join(output_dir,"evaluation_summary.json")

with open(output_path, "w") as f:
    json.dump(summary, f)


# Create DataFrame
df = pd.DataFrame({'All Recall value': all_recall, 'All Precision@10 values': all_prec_at_10})

# Convert string path to Path object
output_dir = Path("results/BM25/")/name

# Create directory if it doesn't exist
output_dir.mkdir(parents=True, exist_ok=True)

# Save as TSV
df.to_csv(output_dir / 'evaluation.tsv', sep='\t', index=False)

    