In [1]:
import sys
sys.path.append('../')

import pyterrier as pt
import pandas as pd
import os
from src.create_index import load_data, load_folds

In [2]:
sub_collections = [
    "t1",
    "t2",
    "t3",
    "t4",
    "t5",
]

In [3]:
def load_runs_for_subcollection(runs_path, sub_collection, fold_no):
    run_files = os.listdir(runs_path)
    runs = []
    run_names = []
    for run_file in run_files:
        if "_"+sub_collection in run_file and "F"+fold_no in run_file:
            run_name = run_file
            if "extended" in run_name or "long" in run_name:
                continue
        
            run_names.append(run_name)    
            run = pt.io.read_results(os.path.join(runs_path, run_name))
            runs.append(run)
    
    return runs, run_names

In [4]:
def filter_known_topics(df, sub_collection):
    sub_collections = ["t0", "t1", "t2", "t3", "t4", "t5"]
    query_map = pd.read_csv("../data/query_id_map.csv.gz", compression="gzip")
    query_map = query_map.dropna(subset=[sub_collection]).set_index(sub_collection)
    history = sub_collections[: sub_collections.index(sub_collection)]
    qids = query_map[history].dropna(how="all")
    qids = set(qids.index.tolist())
    return df[df["qid"].isin(qids)]

In [5]:
def make_table(results, folds=False):
    table = results.copy()
    
    
    # Rename runs
    table['name'] = table['name'].str.split('_').str[0]
    table = table.replace({"BM25": "BM25", "BM25+RM3": "+RM3", "BM25+Bo1": "+Bo1", "BM25+RF":"+RF", "BM25+qrel": "+qrel"})

    table = table.rename(columns={"name": "Run", "sub_collection":"t", "ndcg":"nDCG", "ndcg_cut.10":"nDCG@10", "P.10": "P@10", "recip_rank": "MRR"})
    table = table.replace({"t1": "$t_1$", "t2": "$t_2$", "t3": "$t_3$", "t4": "$t_4$", "t5": "$t_5$"})
    
    custom_order = ['BM25', '+RM3', '+Bo1', '+RF', '+qrel']
    table['Run'] = pd.Categorical(table['Run'], categories=custom_order, ordered=True)
    
    # Average over folds    
    if folds:
        table = table.drop("fold_no", axis=1).groupby(["t", "Run"]).mean()
    else:
        table = table.groupby(["t", "Run"]).mean()
           
    return table

# Natural Evolving

In [6]:
results = pd.DataFrame()

for sub_collection in sub_collections:
    topics, qrels = load_data(sub_collection)
    runs, run_names = load_runs_for_subcollection("../data/results_full", sub_collection, "0")
    topics = filter_known_topics(topics, sub_collection)

    res = pt.Experiment(
        runs,
        topics,
        qrels,
        names=run_names,
        eval_metrics=["ndcg", "ndcg_cut.10", "bpref", "P.10", "recip_rank"],
        verbose=True
        )

    res["sub_collection"] = sub_collection
    results = pd.concat([results, res])

Java started (triggered by _read_topics_trec) and loaded: pyterrier.java, pyterrier.terrier.java [version=5.10 (build: craigm 2024-08-22 17:33), helper_version=0.0.8]
pt.Experiment: 100%|██████████| 6/6 [00:09<00:00,  1.57s/system]
pt.Experiment: 100%|██████████| 6/6 [00:09<00:00,  1.66s/system]
pt.Experiment: 100%|██████████| 6/6 [00:06<00:00,  1.07s/system]
pt.Experiment: 100%|██████████| 6/6 [00:04<00:00,  1.36system/s]
pt.Experiment: 100%|██████████| 5/5 [00:13<00:00,  2.75s/system]


In [7]:
table = make_table(results)
table

  table = table.groupby(["t", "Run"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,nDCG,nDCG@10,bpref,P@10,MRR
t,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
$t_1$,BM25,0.29002,0.16736,0.322343,0.097083,0.267963
$t_1$,+RM3,0.28916,0.160526,0.331976,0.097917,0.24829
$t_1$,+Bo1,0.298634,0.172203,0.338289,0.100833,0.266395
$t_1$,+RF,0.459869,0.344109,0.511598,0.149583,0.577352
$t_1$,+qrel,0.418836,0.328013,0.388725,0.152917,0.498002
$t_2$,BM25,0.323675,0.198455,0.344076,0.127331,0.2882
$t_2$,+RM3,0.323631,0.193317,0.342006,0.12701,0.28236
$t_2$,+Bo1,0.33187,0.199886,0.34859,0.13119,0.287696
$t_2$,+RF,0.500509,0.376979,0.506636,0.194212,0.616894
$t_2$,+qrel,0.445316,0.348833,0.409334,0.190193,0.535177


In [8]:
table.reset_index().to_latex("../paper/table-results.tex", index=False, float_format="%.3f")

# Cross Validation

In [13]:
results = pd.DataFrame()

folds = load_folds()

for sub_collection in sub_collections:    
    topics, qrels = load_data(sub_collection)
    topics = filter_known_topics(topics, sub_collection)
        
    for fold_no in range(0, len(folds[sub_collection])):
        train_docids = folds[sub_collection][str(fold_no)]["train"]
        test_docids = folds[sub_collection][str(fold_no)]["test"]
                    
        runs, run_names = load_runs_for_subcollection("../data/results", sub_collection, str(fold_no))
    
        qrels_fold = qrels.copy()
        qrels_fold = qrels_fold[~qrels_fold["docno"].isin(train_docids)]
        
        res = pt.Experiment(
        runs,
        topics,
        qrels,
        names=run_names,
        eval_metrics=["ndcg", "ndcg_cut.10", "bpref", "P.10", "recip_rank"],
        verbose=True
        )
        
        res["sub_collection"] = sub_collection
        res["fold_no"] = fold_no
        results = pd.concat([results, res])

pt.Experiment: 100%|██████████| 5/5 [00:07<00:00,  1.58s/system]
pt.Experiment: 100%|██████████| 5/5 [00:07<00:00,  1.56s/system]
pt.Experiment: 100%|██████████| 5/5 [00:07<00:00,  1.58s/system]
pt.Experiment: 100%|██████████| 5/5 [00:08<00:00,  1.64s/system]
pt.Experiment: 100%|██████████| 5/5 [00:08<00:00,  1.65s/system]
pt.Experiment: 100%|██████████| 5/5 [00:08<00:00,  1.64s/system]
pt.Experiment: 100%|██████████| 5/5 [00:05<00:00,  1.06s/system]
pt.Experiment: 100%|██████████| 5/5 [00:05<00:00,  1.06s/system]
pt.Experiment: 100%|██████████| 5/5 [00:05<00:00,  1.07s/system]
pt.Experiment: 100%|██████████| 5/5 [00:03<00:00,  1.35system/s]
pt.Experiment: 100%|██████████| 5/5 [00:03<00:00,  1.34system/s]
pt.Experiment: 100%|██████████| 5/5 [00:03<00:00,  1.36system/s]
pt.Experiment: 100%|██████████| 5/5 [00:13<00:00,  2.71s/system]
pt.Experiment: 100%|██████████| 5/5 [00:13<00:00,  2.71s/system]
pt.Experiment: 100%|██████████| 5/5 [00:13<00:00,  2.68s/system]


In [14]:
table = make_table(results, folds=True)
table 

  table = table.drop("fold_no", axis=1).groupby(["t", "Run"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,nDCG,nDCG@10,bpref,P@10,MRR
t,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
$t_1$,BM25,0.210387,0.12498,0.228038,0.069028,0.217848
$t_1$,+RM3,0.209302,0.120403,0.233044,0.070139,0.206548
$t_1$,+Bo1,0.217031,0.130342,0.238136,0.072639,0.219205
$t_1$,+RF,0.328842,0.251247,0.346069,0.114306,0.516901
$t_1$,+qrel,0.365628,0.311513,0.314509,0.130972,0.598246
$t_2$,BM25,0.257668,0.160785,0.268939,0.101072,0.253991
$t_2$,+RM3,0.255681,0.15639,0.266143,0.100857,0.245758
$t_2$,+Bo1,0.2619,0.159913,0.271265,0.102465,0.252486
$t_2$,+RF,0.358357,0.262092,0.351168,0.140514,0.511538
$t_2$,+qrel,0.381505,0.308861,0.342533,0.157878,0.557248


In [15]:
# table.to_latex("../paper/table-results-fold.tex", float_format="%.3f")
table.reset_index().to_latex("../paper/table-results-fold.tex", index=False, float_format="%.3f")