In [19]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [20]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

In [21]:
from pathlib import Path

cache_dir = Path(f"./cache/")

In [22]:
from trec_util import TrecTrack

track = TrecTrack(
    28, "deep", "msmarco-passage/trec-dl-2019", cache_dir,
    Path(
        "/mnt/ceph/storage/data-in-progress/data-research/"
        "web-search/web-search-trec/trec-system-runs"
    )
)
retrieval_systems = [result for result in track.results]

Read results:   0%|          | 0/37 [00:00<?, ?run/s]

In [23]:
retrieval_system_names = track.result_names

## Axiom definition
Now let's define our axiom for reranking.
In the `ir_axioms` package, we already include many great retrieval axioms that were found useful in literature.
But a single axiom alone will likely not affect our ranking much.
Therefore, it is essential to combine multiple axioms to form new axioms that can be used for reranking.
We'll now combine a few axiom with and (`&`) to only give a score if all axioms agree.
Then we add the `OriginalAxiom` as a fallback. That way, if our axioms did not conclude a preference, we use the original ranking order as preference.

In [24]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    LNC1, TF_LNC, LB1, STMC1, STMC1_f, STMC2, STMC2_f, AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC,
    OriginalAxiom, AndAxiom
)

# Define axiom for reranker and permutations.
axiom = (
        AndAxiom([
            ~ArgUC(),
            ~QTArg(),
            ~QTPArg(),
            ~aSL(),
            ~LNC1(),
            ~TF_LNC(),
            ~LB1(),
            ~PROX1(),
            ~PROX2(),
            ~PROX3(),
            ~PROX4(),
            ~PROX5(),
            # ~REG(),  # Tie
            # ~ANTI_REG(),  # Tie
            ~AND(),
            ~LEN_AND(),
            ~M_AND(),
            ~LEN_M_AND(),
            ~DIV(),
            ~LEN_DIV(),
            ~RS_TF(),
            ~RS_TF_IDF(),
            ~RS_BM25(),
            ~RS_PL2(),
            ~RS_QL(),
            ~TFC1(),
            ~TFC3(),
            ~M_TDC(),
            ~LEN_M_TDC(),
            ~STMC1(),
            ~STMC1_f(),
            ~STMC2(),
            ~STMC2_f(),
        ]) | OriginalAxiom()
)

In [25]:
from ir_axioms.backend.pyterrier.transformers import AxiomaticReranker

reranked_retrieval_systems = [
    ~(
            retrieval_system % 10 >>
            AxiomaticReranker(
                axiom=axiom,
                index=track.index,
                dataset=track.dataset_name,
                verbose=True
            ) ^ retrieval_system
    )
    for retrieval_system in retrieval_systems
]

In [26]:
reranked_retrieval_system_names = [f"{name} reranked" for name in retrieval_system_names]

In [27]:
from pyterrier.pipelines import Experiment
from ir_measures import nDCG, MAP

# noinspection PyTypeChecker
experiment = Experiment(
    [*reranked_retrieval_systems, *retrieval_systems],
    track.dataset.get_topics(),
    track.dataset.get_qrels(),
    [nDCG @ 5, nDCG @ 10, nDCG @ 20, MAP],
    [*reranked_retrieval_system_names, *retrieval_system_names],
    verbose=True,
)
experiment.sort_values(by="nDCG@10", ascending=False, inplace=True)

pt.Experiment:   0%|          | 0/74 [00:00<?, ?system/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

Reranking axiomatically:   0%|          | 0/200 [00:00<?, ?query/s]

In [28]:
experiment

Unnamed: 0,name,nDCG@5,nDCG@10,nDCG@20
45,Trec(idst_bert_p1),0.778978,0.764475,0.733733
67,Trec(idst_bert_p2),0.775044,0.763157,0.737235
8,Trec(idst_bert_p1) reranked,0.769555,0.759605,0.729307
68,Trec(idst_bert_p3),0.780332,0.759367,0.736413
30,Trec(idst_bert_p2) reranked,0.764531,0.758502,0.733909
...,...,...,...,...
2,Trec(bm25tuned_p) reranked,0.509691,0.496206,0.481194
72,Trec(UNH_bm25),0.446508,0.449468,0.448966
35,Trec(UNH_bm25) reranked,0.444285,0.449245,0.448754
7,Trec(UNH_exDL_bm25) reranked,0.090907,0.082574,0.083515
