In [43]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [44]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

In [45]:
from pyterrier.datasets import get_dataset, Dataset

# Load dataset.
dataset_name = "msmarco-passage"
dataset: Dataset = get_dataset(f"irds:{dataset_name}")
dataset_train: Dataset = get_dataset(f"irds:{dataset_name}/train/judged")
dataset_dev: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2019/judged")
dataset_test: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2020/judged")

In [46]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.split("/")[0]

In [47]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=["text"]
    )

In [48]:
from pyterrier.batchretrieve import BatchRetrieve

# BM25 baseline retrieval.
bm25 = BatchRetrieve(str(index_dir.absolute()), wmodel="BM25", verbose=True, num_results=20)

In [49]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC, STMC1, STMC1_f, STMC2, STMC2_f, LNC1, TF_LNC, LB1,
    REG, ANTI_REG, REG_f, ANTI_REG_f, ASPECT_REG, ASPECT_REG_f, ORIG
)

axioms = [
    ~ArgUC(), ~QTArg(), ~QTPArg(), ~aSL(),
    ~LNC1(), ~TF_LNC(), ~LB1(),
    ~PROX1(), ~PROX2(), ~PROX3(), ~PROX4(), ~PROX5(),
    ~REG(), ~REG_f(), ~ANTI_REG(), ~ANTI_REG_f(), ~ASPECT_REG(), ~ASPECT_REG_f(),
    ~AND(), ~LEN_AND(), ~M_AND(), ~LEN_M_AND(), ~DIV(), ~LEN_DIV(),
    ~RS_TF(), ~RS_TF_IDF(), ~RS_BM25(), ~RS_PL2(), ~RS_QL(),
    ~TFC1(), ~TFC3(), ~M_TDC(), ~LEN_M_TDC(),
    ~STMC1(), ~STMC1_f(), ~STMC2(), ~STMC2_f(),
    ORIG()
]

In [50]:
from pyterrier.transformer import IdentityTransformer
from statistics import mean
from ir_axioms.backend.pyterrier.transformers import AggregatedAxiomaticPreference

features = bm25 % 10 >> (
        AggregatedAxiomaticPreference(
            axioms=axioms,
            index=index_dir,
            aggregations=[mean, min, max],
            dataset=dataset_name,
            verbose=True
        ) #**
        #IdentityTransformer()  # Join features with the original BM25 score.
)

In [51]:
(features % 1).transform(dataset_test.get_topics()[:1])["features"].iloc[0]

BR(BM25):   0%|          | 0/1 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/1 [00:00<?, ?query/s]

array([ 0, -1,  0,  0, -1,  0,  0, -1,  1,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0, -1,  1,  0, -1,  1,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0, -1,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0, -1,
        1,  0,  0,  1,  0, -1,  0,  0, -1,  0,  0, -1,  0,  0, -1,  0,  0,
       -1,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  1,
        0, -1,  1,  0,  0,  0,  0,  0,  1,  0, -1,  0])

In [52]:
from lightgbm import LGBMRanker
from pyterrier.ltr import apply_learned_model

lambda_mart = LGBMRanker(
    task="train",
    min_data_in_leaf=1,
    min_sum_hessian_in_leaf=100,
    max_bin=255,
    num_leaves=7,
    objective="lambdarank",
    metric="ndcg",
    ndcg_eval_at=[5, 10, 20],
    learning_rate=.1,
    importance_type="gain",
    num_iterations=10
)
ltr = features >> apply_learned_model(lambda_mart, form="ltr")

In [53]:
ltr.fit(
    dataset_train.get_topics()[:250],
    dataset_train.get_qrels(),
    dataset_dev.get_topics(),
    dataset_dev.get_qrels()
)

BR(BM25):   0%|          | 0/250 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/250 [00:00<?, ?query/s]

BR(BM25):   0%|          | 0/43 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/43 [00:00<?, ?query/s]

[1]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[2]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[3]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[4]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[5]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[6]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[7]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[8]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[9]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362
[10]	valid_0's ndcg@5: 0.632138	valid_0's ndcg@10: 0.771362	valid_0's ndcg@20: 0.771362




In [54]:
from pyterrier.pipelines import Experiment
from ir_measures import nDCG, MAP

experiment = Experiment(
    [bm25, ltr ^ bm25],
    dataset_test.get_topics(),
    dataset_test.get_qrels(),
    [nDCG @ 5, nDCG @ 10, nDCG @ 20, MAP],
    ["BM25", "Axiomatic LTR"],
    verbose=True,
)
experiment.sort_values(by="nDCG@10", ascending=False, inplace=True)

pt.Experiment:   0%|          | 0/2 [00:00<?, ?system/s]

BR(BM25):   0%|          | 0/54 [00:00<?, ?q/s]

BR(BM25):   0%|          | 0/54 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/54 [00:00<?, ?query/s]

BR(BM25):   0%|          | 0/54 [00:00<?, ?q/s]

In [55]:
experiment

Unnamed: 0,name,nDCG@5,nDCG@10,nDCG@20,AP
0,BM25,0.496557,0.493627,0.479548,0.202382
1,Axiomatic LTR,0.465529,0.471068,0.464554,0.20139
