In [4]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [5]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

In [6]:
from pyterrier.datasets import get_dataset, Dataset

# Load dataset.
dataset_name = "antique"
dataset: Dataset = get_dataset(f"irds:{dataset_name}")
dataset_test: Dataset = get_dataset(f"irds:{dataset_name}/test")

In [7]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.split("/")[0]

In [8]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=["text"]
    )

In [9]:
from pyterrier.batchretrieve import BatchRetrieve

# BM25 baseline retrieval.
bm25 = BatchRetrieve(str(index_dir.absolute()), wmodel="BM25")

In [10]:
from ir_axioms.axiom import ASPECT_REG_fastText, PROX1, PROX2, PROX4, STMC2, ORIG

axiom = ~((ASPECT_REG_fastText() % PROX1() % PROX2() % PROX4() % STMC2()) | ORIG())

In [11]:
from ir_axioms.backend.pyterrier.transformers import AxiomaticReranker

kwiksort = bm25 % 10 >> AxiomaticReranker(
    axiom=axiom,
    index=index_dir,
    dataset=dataset_name,
    cache_dir=cache_dir,
    verbose=True
) ^ bm25

In [12]:
from pyterrier.pipelines import Experiment
from ir_measures import nDCG, MAP

experiment = Experiment(
    [bm25, kwiksort],
    dataset_test.get_topics(),
    dataset_test.get_qrels(),
    [nDCG @ 5, nDCG @ 10, nDCG @ 20, MAP],
    ["BM25", "KwikSort"],
    verbose=True,
)
experiment.sort_values(by="nDCG@10", ascending=False, inplace=True)

pt.Experiment:   0%|          | 0/2 [00:00<?, ?system/s]

Reranking query axiomatically:   0%|          | 0/198 [00:00<?, ?query/s]

In [13]:
experiment

Unnamed: 0,name,nDCG@5,nDCG@10,nDCG@20,AP
0,BM25,0.529428,0.510402,0.478976,0.451778
1,KwikSort,0.529118,0.510093,0.479093,0.451052
