In [15]:
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run
from tira.rest_api_client import Client
import pyterrier as pt
import pandas as pd
from tqdm import tqdm
from ranx import fuse, Run


In [16]:
ensure_pyterrier_is_loaded()
tira = Client()

In [17]:
pt_dataset = pt.get_dataset('irds:ir-lab-sose-2024/ir-acl-anthology-20240504-training')

In [18]:
def to_ranx_dict(run, qrels = None):
    ret = {}
    for _, row in run.iterrows():
        if row['qid'] not in ret:
            ret[row['qid']] = {}

        ret[row['qid']][row['docno']] = row['score']

    if qrels is not None:
        for key in qrels.keys():
            if not key in ret.keys():
                ret[key] = {}
    

    return Run(ret)

def from_ranx(run):
    ret = []

    for qid in tqdm(run.keys()):
        for docno in run[qid].keys():
            ret += [{
                'qid': qid,
                'docno': docno,
                'score': run[qid][docno]
            }]
    return pd.DataFrame(ret)

In [22]:
bm25 = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 (tira-ir-starter-pyterrier)', pt_dataset)
tinybert = tira.pt.from_submission('reneuir-2024/tinyfsu/overcast-elk', pt_dataset)

bm25 = to_ranx_dict(bm25(pt_dataset.get_topics('title')))
tinybert = to_ranx_dict(tinybert(pt_dataset.get_topics('title')))
best_params = {'weights': (0.62, 0.38)}

In [13]:
df = fuse(runs=[bm25, tinybert], norm="min-max", method="wsum", params=best_params)
df = from_ranx(df)

persist_and_normalize_run(df, 'fusion', default_output='.')


100%|██████████| 68/68 [03:54<00:00,  3.45s/it]


The run file is normalized outside the TIRA sandbox, I will store it at ".".
Done. run file is stored under "./run.txt".
