In [1]:
import sys
sys.path.append('../')

import pyterrier as pt
import pandas as pd
import os
from src.create_index import load_data, load_folds

In [2]:
sub_collections = [
    "t1",
    "t2",
    "t3",
    "t4",
    "t5",
]

In [3]:
def load_runs_for_subcollection(runs_path, sub_collection, fold_no):
    run_files = os.listdir(runs_path)
    runs = []
    run_names = []
    for run_file in run_files:
        if "_"+sub_collection in run_file and "F"+fold_no in run_file:
            run_name = run_file
            if "extended" in run_name or "long" in run_name:
                continue
        
            run_names.append(run_name)    
            run = pt.io.read_results(os.path.join(runs_path, run_name))
            runs.append(run)
    
    return runs, run_names

In [4]:
def make_table(results, folds=False):
    table = results.copy()
    
    
    # Rename runs
    table['name'] = table['name'].str.split('_').str[0]
    table = table.replace({"BM25": "BM25", "BM25+RM3": "+RM3", "BM25+Bo1": "+Bo1", "BM25+RF":"+RF", "BM25+qrel": "+qrel"})

    table = table.rename(columns={"name": "Run", "sub_collection":"t", "ndcg":"nDCG", "ndcg_cut.10":"nDCG@10", "P.10": "P@10", "recip_rank": "MRR"})
    table = table.replace({"t1": "$t_1$", "t2": "$t_2$", "t3": "$t_3$", "t4": "$t_4$", "t5": "$t_5$"})
    
    custom_order = ['BM25', '+RM3', '+Bo1', '+RF', '+qrel']
    table['Run'] = pd.Categorical(table['Run'], categories=custom_order, ordered=True)
    
    # Average over folds    
    if folds:
        table = table.drop("fold_no", axis=1).groupby(["t", "Run"]).mean()
    else:
        table = table.groupby(["t", "Run"]).mean()
           
    return table

# Natural Evolving

In [5]:
results = pd.DataFrame()

for sub_collection in sub_collections:
    topics, qrels = load_data(sub_collection)
    runs, run_names = load_runs_for_subcollection("../data/results_full", sub_collection, "0")
    
    res = pt.Experiment(
        runs,
        topics,
        qrels,
        names=run_names,
        eval_metrics=["ndcg", "ndcg_cut.10", "bpref", "P.10", "recip_rank"],
        verbose=True
        ) 
        
    res["sub_collection"] = sub_collection
    results = pd.concat([results, res])

Java started (triggered by _read_topics_trec) and loaded: pyterrier.java, pyterrier.terrier.java [version=5.10 (build: craigm 2024-08-22 17:33), helper_version=0.0.8]
pt.Experiment: 100%|██████████| 6/6 [00:10<00:00,  1.79s/system]
pt.Experiment: 100%|██████████| 6/6 [00:11<00:00,  1.88s/system]
pt.Experiment: 100%|██████████| 6/6 [00:07<00:00,  1.21s/system]
pt.Experiment: 100%|██████████| 6/6 [00:04<00:00,  1.22system/s]
pt.Experiment: 100%|██████████| 5/5 [00:15<00:00,  3.04s/system]


In [7]:
table = make_table(results)
table

  table = table.groupby(["t", "Run"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,nDCG,nDCG@10,bpref,P@10,MRR
t,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
$t_1$,BM25,0.287587,0.180019,0.315905,0.094785,0.259452
$t_1$,+RM3,0.284912,0.170582,0.315886,0.09127,0.240171
$t_1$,+Bo1,0.289766,0.177348,0.321397,0.094785,0.246114
$t_1$,+RF,0.333804,0.228114,0.367402,0.10907,0.34364
$t_1$,+qrel,0.322639,0.223734,0.333968,0.109977,0.322048
$t_2$,BM25,0.298741,0.182955,0.317212,0.103684,0.264393
$t_2$,+RM3,0.29811,0.183684,0.314662,0.106501,0.256755
$t_2$,+Bo1,0.307314,0.190286,0.323448,0.108126,0.270709
$t_2$,+RF,0.358324,0.243108,0.371986,0.126219,0.375145
$t_2$,+qrel,0.339727,0.233624,0.339201,0.124865,0.347611


In [8]:
table.to_latex("../paper/table-results.tex", index=False, float_format="%.3f")

# Cross Validation

In [9]:
results = pd.DataFrame()

folds = load_folds()

for sub_collection in sub_collections:    
    topics, qrels = load_data(sub_collection)
    
    for fold_no in range(0, len(folds[sub_collection])):
        train_docids = folds[sub_collection][str(fold_no)]["train"]
        test_docids = folds[sub_collection][str(fold_no)]["test"]
        
        runs, run_names = load_runs_for_subcollection("../data/results", sub_collection, str(fold_no))
    
        qrels = qrels[~qrels["docno"].isin(train_docids)]
        
        res = pt.Experiment(
        runs,
        topics,
        qrels,
        names=run_names,
        eval_metrics=["ndcg", "ndcg_cut.10", "bpref", "P.10", "recip_rank"],
        verbose=True
        ) 
        
        res["sub_collection"] = sub_collection
        res["fold_no"] = fold_no
        results = pd.concat([results, res])
    

pt.Experiment: 100%|██████████| 5/5 [00:08<00:00,  1.73s/system]
pt.Experiment: 100%|██████████| 5/5 [00:08<00:00,  1.72s/system]
pt.Experiment: 100%|██████████| 5/5 [00:08<00:00,  1.73s/system]
pt.Experiment: 100%|██████████| 4/4 [00:07<00:00,  1.83s/system]
pt.Experiment: 100%|██████████| 5/5 [00:09<00:00,  1.81s/system]
pt.Experiment: 100%|██████████| 5/5 [00:09<00:00,  1.82s/system]
pt.Experiment: 100%|██████████| 4/4 [00:04<00:00,  1.17s/system]
pt.Experiment: 100%|██████████| 5/5 [00:05<00:00,  1.18s/system]
pt.Experiment: 100%|██████████| 5/5 [00:05<00:00,  1.16s/system]
pt.Experiment: 100%|██████████| 4/4 [00:03<00:00,  1.27system/s]
pt.Experiment: 100%|██████████| 5/5 [00:03<00:00,  1.27system/s]
pt.Experiment: 100%|██████████| 5/5 [00:03<00:00,  1.27system/s]
pt.Experiment: 100%|██████████| 4/4 [00:11<00:00,  2.97s/system]
pt.Experiment: 100%|██████████| 5/5 [00:14<00:00,  2.96s/system]
pt.Experiment: 100%|██████████| 5/5 [00:14<00:00,  2.94s/system]


In [10]:
make_table(results, folds=True)

  table = table.drop("fold_no", axis=1).groupby(["t", "Run"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,nDCG,nDCG@10,bpref,P@10,MRR
t,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
$t_1$,BM25,0.201073,0.130325,0.221094,0.063303,0.169373
$t_1$,+RM3,0.201639,0.124286,0.219087,0.060544,0.160161
$t_1$,+Bo1,0.204611,0.127288,0.224692,0.062094,0.162361
$t_1$,+RF,0.200338,0.129347,0.220243,0.063076,0.168661
$t_1$,+qrel,0.201073,0.130325,0.221094,0.063303,0.169373
$t_2$,BM25,0.2378,0.147111,0.248171,0.073701,0.200901
$t_2$,+RM3,0.240369,0.150867,0.252741,0.078149,0.194281
$t_2$,+Bo1,0.246815,0.154427,0.256868,0.077028,0.207875
$t_2$,+RF,0.235419,0.143647,0.249341,0.072761,0.196226
$t_2$,+qrel,0.217929,0.135181,0.225487,0.069327,0.186593


In [15]:
table.to_latex("../paper/table-results-fold.tex", index=False, float_format="%.3f")
table