In [2]:
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run
from tira.rest_api_client import Client
import pyterrier as pt
import pandas as pd
from tqdm import tqdm
from statistics import mean
from ranx import fuse, Run, Qrels, optimize_fusion


In [3]:
ensure_pyterrier_is_loaded()
tira = Client()

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [4]:
pt_dataset = pt.get_dataset('irds:ir-lab-sose-2024/ir-acl-anthology-20240504-training')

In [8]:
def to_ranx_dict(run, qrels = None):
    ret = {}
    for _, row in run.iterrows():
        if row['qid'] not in ret:
            ret[row['qid']] = {}

        ret[row['qid']][row['docno']] = row['score']

    if qrels is not None:
        for key in qrels.keys():
            if not key in ret.keys():
                ret[key] = {}
    

    return Run(ret)

def from_ranx(run):
    ret = []

    for qid in tqdm(run.keys()):
        for docno in run[qid].keys():
            ret += [{
                'qid': qid,
                'docno': docno,
                'score': run[qid][docno]
            }]
    return pd.DataFrame(ret)

In [9]:
qrels = Qrels.from_ir_datasets("ir-lab-sose-2024/ir-acl-anthology-20240504-training")

In [10]:
bm25 = tira.pd.from_retriever_submission('ir-lab-sose-2024/tira-ir-starter/BM25 (tira-ir-starter-pyterrier)', pt_dataset)
tinybert = tira.pd.from_retriever_submission('reneuir-2024/tinyfsu/overcast-elk', pt_dataset)

In [11]:
run1 = to_ranx_dict(bm25)
run3 = to_ranx_dict(tinybert)

In [12]:
best_params = optimize_fusion(
    qrels=qrels,
    runs=[run1, run3],
    norm="min-max",
    method="wsum",
    metric="ndcg@10", 
    show_progress = True,
    step=0.01, # -> best_params = (0.1, 0.1, 0.1, 0.7)
    # step=0.05, # -> best_params = (0.10, 0.10, 0.10, 0.70)
)
print(best_params)

Output()

{'weights': (0.62, 0.38)}


In [31]:
df = fuse(runs=[run1, run3], norm="min-max", method="wsum", params=best_params)
df = from_ranx(df)

persist_and_normalize_run(df, 'fusion', default_output='.')


100%|██████████| 68/68 [03:37<00:00,  3.20s/it]


The run file is normalized outside the TIRA sandbox, I will store it at ".".
Done. run file is stored under "./run.txt".


In [15]:
pt.Experiment(
    [tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 (tira-ir-starter-pyterrier)', pt_dataset), 
     pt.transformer.get_transformer(pt.io.read_results('./run.txt')),
     tira.pt.from_submission('reneuir-2024/tinyfsu/overcast-elk', pt_dataset),
     ],
    pt_dataset.get_topics('title'),
    pt_dataset.get_qrels(),
    eval_metrics=['ndcg_cut_10']
)

Unnamed: 0,name,ndcg_cut_10
0,<tira.pyterrier_util.TiraRerankingTransformer ...,0.374041
1,Transformer(),0.405539
2,Transformer(),0.282283
