In [14]:
import pandas as pd
from tira.rest_api_client import Client
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run, ir_datasets
from tqdm import tqdm

# This method ensures that that PyTerrier is loaded so that it also works in the TIRA sandbox
ensure_pyterrier_is_loaded()
import pyterrier as pt


tira = Client()
dataset_id = 'longeval-2023-01-20240423-training'
offset = '2022'
pt_dataset = pt.get_dataset(f'irds:ir-benchmarks/{dataset_id}')


In [10]:
def keyquery_run(offset, expansion_method, retrieval_model, fb_terms, fb_docs, dataset_id, return_transformer=True):
    f = tira.get_run_output(f'ir-benchmarks/ows/time-keyquery-offset-{offset}', dataset_id)
    f = pd.read_json(f'{f}/{expansion_method}_{retrieval_model}_{fb_terms}_{fb_docs}.jsonl.gz', lines=True)
    return pt.transformer.get_transformer(f) if return_transformer else f


In [27]:
keyquery_names = []
keyquery_pipelines = []

for wm in ['BM25', 'DirichletLM']:
    for expansion in ['bo1', 'kl', 'rm3']:
        for fb_terms in [10, 20, 30]:
            for fb_docs in [5, 10]:
                keyquery_names.append(f'{wm}_{expansion}_{fb_terms}_{fb_docs}')
                keyquery_pipelines.append(keyquery_run(offset, expansion, wm, fb_terms, fb_docs, dataset_id))

In [20]:
overlapping_queries = set(keyquery_run(offset, 'rm3', 'BM25', 10, 10, dataset_id, False)['qid'].unique())
len(overlapping_queries)

print('Select overlapping topics...')
topics = pt_dataset.get_topics('text')
topics = topics[topics['qid'].isin(overlapping_queries)]

print(f'Done. Found {len(topics)} overlapping topics.')

Select overlapping topics...
Done. Found 195 overlapping topics.


In [32]:
df = pt.Experiment(
    [
        tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 (tira-ir-starter-pyterrier)', dataset_id),
        tira.pt.from_submission('ir-benchmarks/fschlatt/sparse-cross-encoder-4-512', dataset_id),
        tira.pt.from_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', dataset_id),
    ] + [keyquery_run(offset, 'bo1', 'BM25', 30, 5, dataset_id, return_transformer=True)],
    topics,
    pt_dataset.get_qrels(),
    ["ndcg", "ndcg_cut.10", "recip_rank", "recall_100",],
    names=["BM25", 'Sparse Cross Encoder', 'RankZephyr'] + ['Keyquery (tuned)'],
)

df

{'status': 0, 'context': {'include_navigation': False, 'user_id': None, 'role': 'guest', 'organizer_teams': '[]', 'submission': {'docker_software_id': 244, 'display_name': 'BM25 (tira-ir-starter-pyterrier)', 'user_image_name': 'registry.webis.de/code-research/tira/tira-user-tira-ir-starter/pyterrier:0.0.1', 'command': '/workspace/pyterrier_cli.py --input $inputDataset --output $outputDir --index_directory $inputRun --params wmodel=BM25 --retrieval_pipeline default_pipelines.wmodel_batch_retrieve', 'tira_image_name': 'registry.webis.de/code-research/tira/tira-user-tira-ir-starter/pyterrier:0.0.1-tira-docker-software-id-sienna-bug', 'task_id': 'ir-benchmarks', 'vm_id': 'tira-ir-starter', 'description': '', 'paper_link': '', 'input_docker_software': 'Index (tira-ir-starter-pyterrier)', 'input_docker_software_id': 243, 'input_upload_id': None, 'ir_re_ranker': False, 'public_image_name': 'docker.io/webis/ir-benchmarks-submissions:tira-ir-starter-pyterrier-0-0-1-tira-docker-software-id-sienn

Unnamed: 0,name,ndcg,ndcg_cut.10,recip_rank,recall_100
0,BM25,0.350489,0.171283,0.336867,0.547271
1,Sparse Cross Encoder,0.327323,0.202812,0.376275,0.547271
2,RankZephyr,0.342603,0.223018,0.435556,0.547271
3,Keyquery (tuned),0.447931,0.25642,0.51609,0.672551


In [30]:
df.sort_values(by='ndcg_cut.10', ascending=False).head(10)

Unnamed: 0,name,ndcg,ndcg_cut.10,recip_rank,recall_100
7,BM25_bo1_30_5,0.447931,0.25642,0.51609,0.672551
25,DirichletLM_bo1_30_5,0.447931,0.25642,0.51609,0.672551
31,DirichletLM_kl_30_5,0.446503,0.255012,0.510258,0.675961
13,BM25_kl_30_5,0.446503,0.255012,0.510258,0.675961
23,DirichletLM_bo1_20_5,0.446218,0.254156,0.509127,0.671152
5,BM25_bo1_20_5,0.446218,0.254156,0.509127,0.671152
8,BM25_bo1_30_10,0.445263,0.249164,0.514283,0.67191
26,DirichletLM_bo1_30_10,0.445263,0.249164,0.514283,0.67191
14,BM25_kl_30_10,0.44462,0.248809,0.505839,0.676771
32,DirichletLM_kl_30_10,0.44462,0.248809,0.505839,0.676771
