In [20]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="notebook")

In [21]:
from pathlib import Path

base_results_dir = Path("/mnt/ceph/storage/data-in-progress/data-research/web-search/web-search-trec/trec-system-runs")

In [22]:
from pyterrier.io import read_results
from pyterrier.transformer import Transformer
from pyterrier.datasets import Dataset, get_dataset
from typing import List, Dict
from dataclasses import dataclass


@dataclass(frozen=True)
class Track:
    edition: int
    track: str
    dataset_name: str

    @property
    def dataset(self) -> Dataset:
        return get_dataset(f"irds:{self.dataset_name}")

    @property
    def result_dir(self) -> Path:
        return base_results_dir / f"trec{self.edition}" / self.track

    @property
    def results(self) -> Dict[str, Transformer]:
        dataset: Dataset = self.dataset
        return {
            path.stem.replace("input.", ""): Transformer.from_df(read_results(
                str(path.absolute()),
                dataset=dataset,
            ))
            for path in self.result_dir.iterdir()
        }

In [23]:
tracks = [
    Track(28, "deep", "msmarco-passage/trec-dl-2019"),
    Track(29, "deep", "msmarco-passage/trec-dl-2020"),
]

In [24]:
from typing import Tuple

results: List[Tuple[Dataset, Dict[str, Transformer]]] = [
    (track.dataset, track.results)
    for track in tracks
]

[INFO] [starting] https://msmarco.blob.core.windows.net/msmarcoranking/msmarco-test2020-queries.tsv.gz
[INFO] [finished] https://msmarco.blob.core.windows.net/msmarcoranking/msmarco-test2020-queries.tsv.gz: [00:00] [4.13kB] [2.21MB/s]
                                                                                                                

In [25]:
dataset, track_results = results[0]

In [27]:
pipelines = [pipeline for _, pipeline in track_results.items()]
pipeline_names = [name for name, _ in track_results.items()]

In [28]:
from pyterrier.pipelines import Experiment
from ir_measures import nDCG

# noinspection PyTypeChecker
experiment = Experiment(
    pipelines,
    dataset.get_topics(),
    dataset.get_qrels(),
    [nDCG @ 10],
    pipeline_names
)
experiment

Unnamed: 0,name,nDCG@10
0,TUW19-p1-f,0.6756
1,runid4,0.702778
2,bm25tuned_p,0.497332
3,TUW19-p2-re,0.661479
4,ICT-CKNRM_B,0.648106
5,idst_bert_pr1,0.737759
6,ms_duet_passage,0.61374
7,UNH_exDL_bm25,0.081719
8,idst_bert_p1,0.764475
9,p_exp_rm3_bert,0.742242
