## import packages

In [None]:
import pandas as pd
from tqdm import tqdm
from app.services.evaluation.map_evaluator import compute_map,average_precision
from app.services.hybrid_service import HybridSearchService
import os
import asyncio

## define datasetname

In [None]:
dataset_name1='nano-beir/arguana'
dataset_name2='beir/webis-touche2020/v2'
dataset_name3='beir/quora/test'

datasetname=dataset_name1
name=datasetname.replace("/", "-").replace("\\", "_").strip()

## Load queries and qrels files

In [None]:
if not datasetname:
    raise ValueError("datasetname variable is not defined")

qrels_df = pd.read_csv(f"data/{datasetname}/qrels.tsv", sep="\t", names=["query_id", "doc_id", "relevance"])
# print(qrels_df)
queries_df = pd.read_csv(f"data/{datasetname}/queries.tsv", sep="\t", names=["query_id", "text"])


# make instance from EMBEDDING

In [None]:
hybrid=HybridSearchService(collection_name=datasetname)
await hybrid.load_models()
map_scores = []
query_ids = []

## compute MAP

In [None]:

for _, row in tqdm(queries_df.iterrows(), total=len(queries_df)):
    query_id = row["query_id"]
    query_text = row["text"]
    
    # Get top_k docs from your system
    # search_results = hybrid.search(query_text, top_k=10000)
    search_results = await hybrid.search(query_text, top_k=10000)



    # Access the list of result dicts
    result_items = search_results["results"]
    retrieved_doc_ids = [str(doc["doc_id"]) for doc in result_items]

    map_score = compute_map(retrieved_doc_ids, qrels_df, query_id)
    print(f"\n map_score: {map_score}")
    map_scores.append(map_score)
    query_ids.append(query_id)
    # print(f"\n✅ Mean Average Precision (MAP): {map_score:.4f}")


## save files 

In [None]:
map_df = pd.DataFrame({
    "query_id": query_ids,
    "map_score": map_scores
})

output_dir = os.path.join("results", "MAP", "HYBRID", name)
os.makedirs(output_dir, exist_ok=True)

output_path = os.path.join(output_dir, f"{name}_map_scores.csv")
try:
    map_df.to_csv(output_path, index=False)
except Exception as e:
    print(f"Error saving file to {output_path}: {str(e)}")
    raise


## get MAP result

In [None]:
# Also print overall MAP
overall_map = sum(map_scores) / len(map_scores)
print(f"\n📈 Overall MAP: {overall_map:.4f}")