## import packages

In [1]:
import pandas as pd
from tqdm import tqdm
from app.services.evaluator import compute_recall,compute_precision,compute_average_precision,compute_precision_at_k,compute_mrr
from app.services.hybrid_service import HybridSearchService
import os
import asyncio


dataset_name1='nano-beir/arguana'
dataset_name2='beir/webis-touche2020/v2'
dataset_name3='beir/quora/test'
dataset_name4='antique/test'

datasetname=dataset_name4
name=datasetname.replace("/", "-").replace("\\", "_").strip()

  from .autonotebook import tqdm as notebook_tqdm


## Load queries and qrels files

In [2]:
if not datasetname:
    raise ValueError("datasetname variable is not defined")

qrels_df = pd.read_csv(f"data/{name}/qrels.tsv", sep="\t", names=["query_id", "doc_id", "relevance"])
# print(qrels_df)
queries_df = pd.read_csv(f"data/{name}/queries.tsv", sep="\t", names=["query_id", "text"])


# make instance from Embedding

In [3]:
hybrid=HybridSearchService(collection_name=name)
await hybrid.load_models()


MPS backend is available.
Loading models for collection: antique-test
⚠️ Loading BM25 model for collection: antique-test
🔍 Loading embeddings documents


## Evaluation 

In [4]:
from tqdm import tqdm
import asyncio

BATCH_SIZE = 50  # Tune this for your machine — higher is faster but needs more memory

async def evaluate_query(row):
    query_id = row["query_id"]
    query_text = row["text"]

    search_results = await hybrid.search_with_Index(query=query_text, top_k=1000)
    result_items = search_results["results"]
    retrieved_docs = [str(doc["doc_id"]) for doc in result_items]
    relevant_docs = set(qrels_df[qrels_df["query_id"] == query_id]["doc_id"].astype(str))

    avg_precision = compute_average_precision(relevant_docs, retrieved_docs)
    prec_at_10 = compute_precision_at_k(relevant_docs, retrieved_docs, k=10)
    recall = compute_recall(relevant_docs, retrieved_docs)

    rank = 0
    for i, doc_id in enumerate(retrieved_docs):
        if doc_id in relevant_docs:
            rank = i + 1
            break

    return avg_precision, prec_at_10, recall, rank


async def main():
    all_avg_precisions = []
    all_prec_at_10 = []
    all_mrr_ranks = []
    all_recall = []

    rows = list(queries_df.iterrows())

    for i in tqdm(range(0, len(rows), BATCH_SIZE), desc="Evaluating"):
        batch = rows[i:i+BATCH_SIZE]
        tasks = [evaluate_query(row) for _, row in batch]
        results = await asyncio.gather(*tasks)
        total = len(rows)

        for avg_precision, prec_at_10, recall, rank in results:
            all_avg_precisions.append(avg_precision)
            all_prec_at_10.append(prec_at_10)
            all_mrr_ranks.append(rank)
            all_recall.append(recall)

        percent = (i + len(batch)) / total * 100
        print(f"✅ Progress: {percent:.2f}% ({i + len(batch)}/{total})")

    # Final results
    map_score = sum(all_avg_precisions) / len(all_avg_precisions)
    mrr_score = compute_mrr(all_mrr_ranks)

    print(f"\n✅ MAP: {map_score:.4f}")
    print(f"✅ MRR: {mrr_score:.4f}")
    print(f"✅ Mean Recall: {sum(all_recall)/len(all_recall):.4f}")
    print(f"✅ all Precision@10: {all_prec_at_10:.4f}")

# Now run it in Jupyter:
await main()


Evaluating:  25%|██▌       | 1/4 [02:57<08:51, 177.13s/it]

✅ Progress: 25.00% (50/200)


Evaluating:  50%|█████     | 2/4 [05:06<04:58, 149.18s/it]

✅ Progress: 50.00% (100/200)


Evaluating:  75%|███████▌  | 3/4 [06:56<02:11, 131.12s/it]

✅ Progress: 75.00% (150/200)


Evaluating: 100%|██████████| 4/4 [09:17<00:00, 139.48s/it]

✅ Progress: 100.00% (200/200)

✅ MAP: 0.2139
✅ MRR: 0.8752
✅ Mean Recall: 0.6267





TypeError: unsupported format string passed to list.__format__

## save result

In [None]:
summary = {
    "Mean Average Precision": map_score,
    "Mean Reciprocal Rank": mrr_score
}
import json

output_dir = os.path.join("results", "HYBRID", name)
os.makedirs(output_dir)
output_path = os.path.join(output_dir,"evaluation_summary.json")

with open(output_path, "w") as f:
    json.dump(summary, f)

    

In [None]:
# Create DataFrame
df = pd.DataFrame({'All Recall value': all_recall, 'All Precision@10 values': all_prec_at_10})


from pathlib import Path


# Convert string path to Path object
output_dir = Path("results/Hybrid/")/name

# Create directory if it doesn't exist
output_dir.mkdir(parents=True, exist_ok=True)

# Save as TSV
df.to_csv(output_dir / 'evaluation.tsv', sep='\t', index=False)

