## import packages

In [1]:
import pandas as pd
from tqdm import tqdm
from app.services.evaluation.map_evaluator import compute_map,average_precision
from app.services.bm25_service import BM25Service
import os


## define datasetname

In [2]:
dataset_name1='nano-beir/arguana'
dataset_name2='beir/webis-touche2020/v2'
dataset_name3='beir/quora/test'

datasetname=dataset_name1
name=datasetname.replace("/", "-").replace("\\", "_").strip()

## Load queries and qrels files

In [3]:
if not datasetname:
    raise ValueError("datasetname variable is not defined")

qrels_df = pd.read_csv(f"data/{datasetname}/qrels.tsv", sep="\t", names=["query_id", "doc_id", "relevance"])
# print(qrels_df)
queries_df = pd.read_csv(f"data/{datasetname}/queries.tsv", sep="\t", names=["query_id", "text"])


# make instance from BM25

In [4]:
bm25=BM25Service(datasetname)
bm25.load()
map_scores = []
query_ids = []

⚠️ Loading BM25 model for collection: nano-beir-arguana


## compute MAP

In [8]:

for _, row in tqdm(queries_df.iterrows(), total=len(queries_df)):
    query_id = row["query_id"]
    query_text = row["text"]
    
    # Get top_k docs from your system
    search_results = bm25.search(query_text, top_k=10000)


    # Access the list of result dicts
    result_items = search_results["results"]
    retrieved_doc_ids = [str(doc["doc_id"]) for doc in result_items]

    map_score = compute_map(retrieved_doc_ids, qrels_df, query_id)
    print(f"\n map_score: {map_score}")
    map_scores.append(map_score)
    query_ids.append(query_id)
    # print(f"\n✅ Mean Average Precision (MAP): {map_score:.4f}")


  0%|          | 0/50 [00:00<?, ?it/s]

🔍 Searching BM25 model for collection: nano-beir-arguana


  4%|▍         | 2/50 [00:00<00:09,  5.16it/s]

relevant_docs: {'test-science-wsihwclscaaw-con01b'}

 map_score: 0.07692307692307693
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-economy-eptpghdtre-pro02b'}

 map_score: 0.034482758620689655
🔍 Searching BM25 model for collection: nano-beir-arguana


  6%|▌         | 3/50 [00:00<00:10,  4.48it/s]

relevant_docs: {'test-culture-mthbah-pro02b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-international-amehbuaisji-con04b'}

 map_score: 0.5


  8%|▊         | 4/50 [00:00<00:09,  4.61it/s]

🔍 Searching BM25 model for collection: nano-beir-arguana


 10%|█         | 5/50 [00:01<00:09,  4.50it/s]

relevant_docs: {'test-health-hgwhwbjfs-pro03b'}

 map_score: 0.0034602076124567475
🔍 Searching BM25 model for collection: nano-beir-arguana


 12%|█▏        | 6/50 [00:01<00:10,  4.06it/s]

relevant_docs: {'test-health-dhghwapgd-con01b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 14%|█▍        | 7/50 [00:01<00:12,  3.53it/s]

relevant_docs: {'test-education-pstrgsehwt-pro03b'}

 map_score: 1.0
🔍 Searching BM25 model for collection: nano-beir-arguana


 16%|█▌        | 8/50 [00:01<00:10,  3.86it/s]

relevant_docs: {'test-economy-epiasghbf-con01b'}

 map_score: 0.030303030303030304
🔍 Searching BM25 model for collection: nano-beir-arguana


 18%|█▊        | 9/50 [00:02<00:12,  3.36it/s]

relevant_docs: {'test-sport-otshwbe2uuyt-con03b'}

 map_score: 0.25
🔍 Searching BM25 model for collection: nano-beir-arguana


 20%|██        | 10/50 [00:02<00:10,  3.69it/s]

relevant_docs: {'test-politics-gvhbhlsbr-con04b'}

 map_score: 0.3333333333333333
🔍 Searching BM25 model for collection: nano-beir-arguana


 22%|██▏       | 11/50 [00:02<00:11,  3.45it/s]

relevant_docs: {'test-economy-egecegphw-pro02b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 24%|██▍       | 12/50 [00:03<00:10,  3.64it/s]

relevant_docs: {'test-politics-ypppgvhwmv-pro03b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 26%|██▌       | 13/50 [00:03<00:10,  3.69it/s]

relevant_docs: {'test-law-phwmfri-con03b'}

 map_score: 0.0007479431563201197
🔍 Searching BM25 model for collection: nano-beir-arguana


 28%|██▊       | 14/50 [00:04<00:13,  2.61it/s]

relevant_docs: {'test-culture-tlhrilsfhwr-pro01b'}

 map_score: 0.125
🔍 Searching BM25 model for collection: nano-beir-arguana


 30%|███       | 15/50 [00:04<00:13,  2.67it/s]

relevant_docs: {'test-culture-thbcsbptwhht-pro04b'}

 map_score: 0.2
🔍 Searching BM25 model for collection: nano-beir-arguana


 34%|███▍      | 17/50 [00:04<00:09,  3.41it/s]

relevant_docs: {'test-economy-egppphbcb-pro03b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-health-ahiahbgbsp-pro02b'}

 map_score: 0.029411764705882353
🔍 Searching BM25 model for collection: nano-beir-arguana


 36%|███▌      | 18/50 [00:05<00:08,  3.67it/s]

relevant_docs: {'test-health-dhiacihwph-pro01b'}

 map_score: 0.25
🔍 Searching BM25 model for collection: nano-beir-arguana


 38%|███▊      | 19/50 [00:05<00:08,  3.77it/s]

relevant_docs: {'test-politics-cpegiepgh-con01b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 40%|████      | 20/50 [00:06<00:11,  2.51it/s]

relevant_docs: {'test-religion-grcrgshwbr-pro03b'}

 map_score: 0.25
🔍 Searching BM25 model for collection: nano-beir-arguana


 42%|████▏     | 21/50 [00:06<00:11,  2.60it/s]

relevant_docs: {'test-culture-mthbah-pro05b'}

 map_score: 0.004901960784313725
🔍 Searching BM25 model for collection: nano-beir-arguana


 44%|████▍     | 22/50 [00:06<00:09,  2.85it/s]

relevant_docs: {'test-politics-oapdhwinkp-con01b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 48%|████▊     | 24/50 [00:07<00:07,  3.33it/s]

relevant_docs: {'test-free-speech-debate-ldhwprhs-pro02b'}

 map_score: 0.02702702702702703
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-society-epiasghbf-con04b'}

 map_score: 0.00034129692832764505
🔍 Searching BM25 model for collection: nano-beir-arguana


 50%|█████     | 25/50 [00:07<00:06,  3.87it/s]

relevant_docs: {'test-law-lgplhbssbco-pro02b'}

 map_score: 1.0
🔍 Searching BM25 model for collection: nano-beir-arguana


 54%|█████▍    | 27/50 [00:07<00:05,  4.32it/s]

relevant_docs: {'test-health-dhghhbampt-con01b'}

 map_score: 0.02040816326530612
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-law-umtlilhotac-con02b'}

 map_score: 0.0625
🔍 Searching BM25 model for collection: nano-beir-arguana


 56%|█████▌    | 28/50 [00:07<00:04,  5.05it/s]

relevant_docs: {'test-politics-ypppgvhwmv-con03b'}

 map_score: 0.0011235955056179776
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-international-appghblsba-con03b'}

 map_score: 0.3333333333333333


 58%|█████▊    | 29/50 [00:08<00:04,  4.98it/s]

🔍 Searching BM25 model for collection: nano-beir-arguana


 60%|██████    | 30/50 [00:08<00:04,  4.54it/s]

relevant_docs: {'test-culture-ascidfakhba-pro04b'}

 map_score: 0.011111111111111112
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-education-usuprmhbu-con02b'}

 map_score: 1.0


 64%|██████▍   | 32/50 [00:08<00:03,  5.33it/s]

🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-law-phwmfri-pro01b'}

 map_score: 1.0
🔍 Searching BM25 model for collection: nano-beir-arguana


 66%|██████▌   | 33/50 [00:08<00:03,  4.63it/s]

relevant_docs: {'test-international-gmehwasr-pro05b'}

 map_score: 0.006493506493506494
🔍 Searching BM25 model for collection: nano-beir-arguana


 68%|██████▊   | 34/50 [00:09<00:03,  4.48it/s]

relevant_docs: {'test-education-ughbuesbf-con03b'}

 map_score: 0.16666666666666666
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-free-speech-debate-yfsdfkhbwu-pro02b'}

 map_score: 0.045454545454545456


 70%|███████   | 35/50 [00:09<00:03,  4.60it/s]

🔍 Searching BM25 model for collection: nano-beir-arguana


 72%|███████▏  | 36/50 [00:09<00:03,  4.18it/s]

relevant_docs: {'test-international-gmehwasr-con03b'}

 map_score: 0.0035971223021582736
🔍 Searching BM25 model for collection: nano-beir-arguana


 76%|███████▌  | 38/50 [00:10<00:02,  4.63it/s]

relevant_docs: {'test-economy-egppphbcb-pro01b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-international-siacphbnt-pro04b'}

 map_score: 0.025
🔍 Searching BM25 model for collection: nano-beir-arguana


 80%|████████  | 40/50 [00:10<00:01,  5.29it/s]

relevant_docs: {'test-economy-epegiahsc-pro02b'}

 map_score: 0.05555555555555555
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-international-ipecfiepg-con02b'}

 map_score: 0.058823529411764705
🔍 Searching BM25 model for collection: nano-beir-arguana


 84%|████████▍ | 42/50 [00:10<00:01,  5.62it/s]

relevant_docs: {'test-international-gpdwhwcusa-pro03b'}

 map_score: 0.3333333333333333
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-religion-msgfhwbamec-con02b'}

 map_score: 0.25
🔍 Searching BM25 model for collection: nano-beir-arguana


 86%|████████▌ | 43/50 [00:10<00:01,  5.39it/s]

relevant_docs: {'test-international-aghbfcpspr-pro03b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 88%|████████▊ | 44/50 [00:11<00:01,  4.61it/s]

relevant_docs: {'test-digital-freedoms-dfiphbgs-pro01b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 92%|█████████▏| 46/50 [00:11<00:00,  4.76it/s]

relevant_docs: {'test-philosophy-elhbrd-pro03b'}

 map_score: 1.0
🔍 Searching BM25 model for collection: nano-beir-arguana
relevant_docs: {'test-politics-glghssi-pro03b'}

 map_score: 0.2
🔍 Searching BM25 model for collection: nano-beir-arguana


 94%|█████████▍| 47/50 [00:12<00:00,  3.63it/s]

relevant_docs: {'test-education-egscphsrdt-con01b'}

 map_score: 0.07142857142857142
🔍 Searching BM25 model for collection: nano-beir-arguana


 96%|█████████▌| 48/50 [00:12<00:00,  3.42it/s]

relevant_docs: {'test-international-siacphbnt-pro01b'}

 map_score: 0.5
🔍 Searching BM25 model for collection: nano-beir-arguana


 98%|█████████▊| 49/50 [00:12<00:00,  2.84it/s]

relevant_docs: {'test-economy-beplcpdffe-con04b'}

 map_score: 1.0
🔍 Searching BM25 model for collection: nano-beir-arguana


100%|██████████| 50/50 [00:13<00:00,  3.74it/s]

relevant_docs: {'test-politics-lghwdecm-con02b'}

 map_score: 0.02564102564102564





## save files 

In [6]:
map_df = pd.DataFrame({
    "query_id": query_ids,
    "map_score": map_scores
})

output_dir = os.path.join("results", "MAP", "BM25", name)
os.makedirs(output_dir, exist_ok=True)

output_path = os.path.join(output_dir, f"{name}_map_scores.csv")
try:
    map_df.to_csv(output_path, index=False)
except Exception as e:
    print(f"Error saving file to {output_path}: {str(e)}")
    raise


## get MAP result

In [7]:
# Also print overall MAP
overall_map = sum(map_scores) / len(map_scores)
print(f"\n📈 Overall MAP: {overall_map:.4f}")


📈 Overall MAP: 0.3057
