# Evaluation on Robust04

In [1]:
from glob import glob
import pandas as pd
from trectools import TrecQrel
import json
from tqdm import tqdm

EVAL_DIR = '../resources/eval/trec-system-runs/trec13/'
qrel = TrecQrel('../resources/unprocessed/topics-and-qrels/qrels.robust04.txt')


def load_eval_file(file_name, qrels=None):
    ret = []
    unique_queries = set(qrel.qrels_data['query'].astype(str).unique())
    
    eval_result = json.load(open(file_name, 'r'))
    
    for pool_name, results in eval_result.items():
        if pool_name in ['task']:
            continue
        covered_queries = set()
        scores = {}
        for result in results:
            assert results[0]['run_file'] == result['run_file']
            for eval_measure in result.keys():
                if eval_measure in set(['run_file', 'query']):
                    continue
                if eval_measure not in scores:
                    scores[eval_measure] = []
                
                scores[eval_measure] += [float(result[eval_measure])]
                covered_queries.add(result['query'])
        
        
        
        current_entry = {'run': results[0]['run_file'], 'pooling': pool_name}
        current_entry.update({k: sum(v)/len(unique_queries) for k,v in scores.items()})
        
        ret += [current_entry]

    return pd.DataFrame(ret)

df = []
for eval_file in tqdm(glob(EVAL_DIR + '*.jsonl')):
    df += [load_eval_file(eval_file)]

df = pd.concat(df)
df

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 880/880 [00:45<00:00, 19.45it/s]


Unnamed: 0,run,pooling,UNJ@20,UNJ@10,NDCG@20,NDCG@10,MIN-NDCG@10,MAX-NDCG@10,MIN-NDCG@20,MAX-NDCG@20
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,0.012851,,,,,,,
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,,0.011245,,,,,,
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,,,,,,,,
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,,,,,,,,
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,0.039558,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,,,,,,,,
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,0.019880,,,,,,,
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,0.194378,,,,,,,
0,src/main/resources/unprocessed/trec-system-run...,complete-pool,,,,,,,,


In [4]:
dict(df.iloc[0])

{'run': 'src/main/resources/unprocessed/trec-system-runs/trec13/robust/input.apl04rsTDNw5.gz',
 'pooling': 'complete-pool',
 'UNJ@20': 0.012851405622489962,
 'UNJ@10': nan,
 'NDCG@20': nan,
 'NDCG@10': nan,
 'MIN-NDCG@10': nan,
 'MAX-NDCG@10': nan,
 'MIN-NDCG@20': nan,
 'MAX-NDCG@20': nan}