In [1]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

PyTerrier 0.8.0 has loaded Terrier 5.6 (built by craigmacdonald on 2021-09-17 13:27)

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
edition = 29
track = "deep.documents"
dataset_name = "msmarco-document/trec-dl-2020/judged"
contents_field = "body"
depth = 20

In [3]:
from pyterrier.datasets import get_dataset
from ir_datasets import load

dataset = get_dataset(f"irds:{dataset_name}")
ir_dataset = load(dataset_name)

In [4]:
from tempfile import TemporaryDirectory
from pathlib import Path

tmp_dir = TemporaryDirectory()
cache_dir = Path(tmp_dir.name)
index_dir = Path("cache") / "indices" / dataset_name.split("/")[0]
result_dir = Path(
    "/mnt/ceph/storage/data-in-progress/data-research/"
    "web-search/web-search-trec/trec-system-runs"
) / f"trec{edition}" / track
result_files = list(result_dir.iterdir())

In [5]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=[contents_field]
    )

In [6]:
from pyterrier.io import read_results
from pyterrier import Transformer
from tqdm.auto import tqdm

results = [
    Transformer.from_df(read_results(result_file))
    for result_file in tqdm(result_files, desc="Load results")
]
results_names = [result_file.stem.replace("input.", "") for result_file in result_files]

Load results:   0%|          | 0/64 [00:00<?, ?it/s]

In [7]:
from ir_axioms.axiom import ArgUC, STMC1_f

axioms = [
    # ~ArgUC(),  # Very slow due to network access.
    ~STMC1_f(),  # Rather slow due many similarity calculations.
]
axiom_names = [axiom.axiom.name for axiom in axioms]

In [8]:
from ir_axioms.backend.pyterrier.experiment import AxiomaticExperiment

AxiomaticExperiment(
    retrieval_systems=results,
    topics=dataset.get_topics(),
    qrels=dataset.get_qrels(),
    index=index_dir,
    dataset=ir_dataset,
    contents_accessor=contents_field,
    axioms=axioms,
    axiom_names=axiom_names,
    depth=depth,
    filter_by_qrels=False,
    filter_by_topics=False,
    verbose=True,
    cache_dir=cache_dir,
).preferences

Computing system axiomatic preferences:   0%|          | 0/64 [00:00<?, ?system/s]

Computing query axiom preferences:   0%|          | 0/45 [00:00<?, ?query/s]

Computing query axiom preferences:   0%|          | 0/45 [00:00<?, ?query/s]

Computing query axiom preferences:   0%|          | 0/45 [00:00<?, ?query/s]

Computing query axiom preferences:   0%|          | 0/45 [00:00<?, ?query/s]

KeyboardInterrupt: 