# Results

In [4]:
import json
import pandas as pd
import os
import numpy as np

from src.metrics import (
    mrr_score,
    map_score,
    mr_score,
    mf1_score,
    mndcg_score,
)

In [5]:
OUTPUT_DIR = 'results/raw'
data_folder = 'data/subset_msmarco_train_0'
input_file = 'subset_msmarco_train_0.01_99.pkl'

## Loading results raw

In [6]:
results = {}
for file in os.listdir(OUTPUT_DIR):
    if file.endswith('.json'):
        with open(os.path.join(OUTPUT_DIR, file), 'r') as f:
            data = json.load(f)
        results[file[:-5]] = data
    else:
        print(f"Skipping {file}, not a JSON file.")

## Loading dataset

In [7]:
from src.datasets import MSMarcoDataset, PreProcessor

dataset = MSMarcoDataset(data_folder)
dataset.load_data(input_file)
dataset.split_data()

Loading queries: 100%|██████████| 2771/2771 [00:00<?, ?it/s]
Loading documents: 100%|██████████| 277168/277168 [00:00<00:00, 1393795.09it/s]
Loading qrels: 100%|██████████| 2845/2845 [00:00<00:00, 342404.44it/s]


## Calculate metrics

In [8]:
list_metrics = []
for name, result in results.items():
    for stage in result['stages'].keys():
        list_metrics.append({
            'experiment_name': name,
            'stage_name': stage,
            'mrr': mrr_score(result['score_docs'][stage], dataset.qrels),
            'map': map_score(result['score_docs'][stage], dataset.qrels),
            'mr': mr_score(result['score_docs'][stage], dataset.qrels),
            'mf1': mf1_score(result['score_docs'][stage], dataset.qrels),
            'mndcg': mndcg_score(result['score_docs'][stage], dataset.qrels),
            'avg_time': np.mean(result['times'][stage]),
            'max_time': np.max(result['times'][stage]),
            'min_time': np.min(result['times'][stage]),
            'std_time': np.std(result['times'][stage]),
        })
metrics = pd.DataFrame(list_metrics)

In [9]:
default_style = {
    'mrr': '{:.2f}',
    'map': '{:.2f}',
    'mr': '{:.2f}',
    'mf1': '{:.2f}',
    'mndcg': '{:.2f}',
    'avg_time': '{:.2f}',
    'max_time': '{:.2f}',
    'min_time': '{:.2f}',
    'std_time': '{:.2f}',
}

In [10]:
metrics.sort_values(by=['mrr']).style.format(default_style)

Unnamed: 0,experiment_name,stage_name,mrr,map,mr,mf1,mndcg,avg_time,max_time,min_time,std_time
0,monobert,BM25,0.49,0.07,0.67,0.13,0.52,1.03,2.27,0.41,0.35
1,monobert,MonoBERT,0.69,0.09,0.82,0.15,0.71,9.73,12.94,8.85,0.85
