In [32]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [33]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

In [34]:
from pyterrier.datasets import get_dataset, Dataset

# Load dataset.
dataset_name = "msmarco-passage"
dataset: Dataset = get_dataset(f"irds:{dataset_name}")
dataset_train: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2019/judged")
dataset_test: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2020/judged")

In [35]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.split("/")[0]

In [36]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=["text"]
    )

In [37]:
from pyterrier.batchretrieve import BatchRetrieve

# BM25 baseline retrieval.
bm25 = BatchRetrieve(str(index_dir.absolute()), wmodel="BM25", verbose=True, num_results=20)

In [38]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC, STMC1, STMC1_f, STMC2, STMC2_f, LNC1, TF_LNC, LB1,
    REG, ANTI_REG, REG_f, ANTI_REG_f, ORIG
)

axioms = [
    ~ArgUC(), ~QTArg(), ~QTPArg(), ~aSL(),
    ~LNC1(), ~TF_LNC(), ~LB1(),
    ~PROX1(), ~PROX2(), ~PROX3(), ~PROX4(), ~PROX5(),
    ~REG(), ~REG_f(), ~ANTI_REG(), ~ANTI_REG_f(),
    ~AND(), ~LEN_AND(), ~M_AND(), ~LEN_M_AND(), ~DIV(), ~LEN_DIV(),
    ~RS_TF(), ~RS_TF_IDF(), ~RS_BM25(), ~RS_PL2(), ~RS_QL(),
    ~TFC1(), ~TFC3(), ~M_TDC(), ~LEN_M_TDC(),
    ~STMC1(), ~STMC1_f(), ~STMC2(), ~STMC2_f(),
    ORIG()
]

In [39]:
from statistics import mean, variance
from ir_axioms.backend.pyterrier.transformers import AggregatedAxiomaticPreference

aggregations = [
    sum,
    variance,
    mean,
    max,
    min,
    lambda ps: mean(float(p >= 0) for p in ps),
    lambda ps: mean(float(p <= 0) for p in ps),
]
features = bm25 % 20 >> AggregatedAxiomaticPreference(
    axioms=axioms,
    index=index_dir,
    aggregations=aggregations,
    dataset=dataset_name,
    verbose=True,
)

In [40]:
features.transform(dataset_test.get_topics()[:1])["features"]

BR(BM25):   0%|          | 0/1 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/1 [00:00<?, ?query/s]

0     [-5.0, 0.19736842105263158, -0.25, 0.0, -1.0, ...
1     [-3.0, 0.23947368421052628, -0.15, 1.0, -1.0, ...
2     [-4.0, 0.16842105263157897, -0.2, 0.0, -1.0, 0...
3     [-2.0, 0.2, -0.1, 1.0, -1.0, 0.85, 0.95, -1.0,...
4     [6.0, 0.22105263157894733, 0.3, 1.0, 0.0, 1.0,...
5     [4.0, 0.16842105263157897, 0.2, 1.0, 0.0, 1.0,...
6     [2.0, 0.2, 0.1, 1.0, -1.0, 0.95, 0.85, 5.0, 0....
7     [4.0, 0.16842105263157897, 0.2, 1.0, 0.0, 1.0,...
8     [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, ...
9     [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, ...
10    [-1.0, 0.049999999999999996, -0.05, 0.0, -1.0,...
11    [1.0, 0.049999999999999996, 0.05, 1.0, 0.0, 1....
12    [2.0, 0.2, 0.1, 1.0, -1.0, 0.95, 0.85, -6.0, 0...
13    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, ...
14    [-3.0, 0.13421052631578945, -0.15, 0.0, -1.0, ...
15    [-1.0, 0.049999999999999996, -0.05, 0.0, -1.0,...
16    [2.0, 0.09473684210526316, 0.1, 1.0, 0.0, 1.0,...
17    [1.0, 0.26052631578947366, 0.05, 1.0, -1.0

In [41]:
from lightgbm import LGBMRanker
from pyterrier.ltr import apply_learned_model

lambda_mart = LGBMRanker(
    task="train",
    num_leaves=64,
    objective="lambdarank",
    metric="ndcg",
    ndcg_eval_at=[5, 10],
    learning_rate=.1,
    num_iterations=500,
    importance_type="gain",
)
ltr = features >> apply_learned_model(lambda_mart, form="ltr")

In [42]:
ltr.fit(
    dataset_train.get_topics(),
    dataset_train.get_qrels(),
    dataset_test.get_topics()[:10],
    dataset_test.get_qrels()
)

BR(BM25):   0%|          | 0/43 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/43 [00:00<?, ?query/s]

BR(BM25):   0%|          | 0/10 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/10 [00:00<?, ?query/s]



[1]	valid_0's ndcg@5: 0.414169	valid_0's ndcg@10: 0.539215
[2]	valid_0's ndcg@5: 0.452077	valid_0's ndcg@10: 0.56795
[3]	valid_0's ndcg@5: 0.466076	valid_0's ndcg@10: 0.560126
[4]	valid_0's ndcg@5: 0.461305	valid_0's ndcg@10: 0.537124
[5]	valid_0's ndcg@5: 0.531345	valid_0's ndcg@10: 0.583721
[6]	valid_0's ndcg@5: 0.523462	valid_0's ndcg@10: 0.588838
[7]	valid_0's ndcg@5: 0.55886	valid_0's ndcg@10: 0.609431
[8]	valid_0's ndcg@5: 0.569162	valid_0's ndcg@10: 0.619859
[9]	valid_0's ndcg@5: 0.543874	valid_0's ndcg@10: 0.648808
[10]	valid_0's ndcg@5: 0.543237	valid_0's ndcg@10: 0.636326
[11]	valid_0's ndcg@5: 0.544582	valid_0's ndcg@10: 0.654756
[12]	valid_0's ndcg@5: 0.581585	valid_0's ndcg@10: 0.645021
[13]	valid_0's ndcg@5: 0.578295	valid_0's ndcg@10: 0.646972
[14]	valid_0's ndcg@5: 0.559168	valid_0's ndcg@10: 0.636265
[15]	valid_0's ndcg@5: 0.521877	valid_0's ndcg@10: 0.613303
[16]	valid_0's ndcg@5: 0.567678	valid_0's ndcg@10: 0.636185
[17]	valid_0's ndcg@5: 0.572691	valid_0's ndcg@10: 

In [43]:
from pyterrier.pipelines import Experiment
from ir_measures import nDCG, MAP, RR

experiment = Experiment(
    [bm25, ltr ^ bm25],
    dataset_test.get_topics()[10:],
    dataset_test.get_qrels(),
    [nDCG@10, RR, MAP],
    ["BM25", "Axiomatic LTR"],
    verbose=True,
)
experiment.sort_values(by="nDCG@10", ascending=False, inplace=True)

pt.Experiment:   0%|          | 0/2 [00:00<?, ?system/s]

BR(BM25):   0%|          | 0/44 [00:00<?, ?q/s]

BR(BM25):   0%|          | 0/44 [00:00<?, ?q/s]

Aggregating query axiom preferences:   0%|          | 0/44 [00:00<?, ?query/s]

BR(BM25):   0%|          | 0/44 [00:00<?, ?q/s]

In [44]:
experiment

Unnamed: 0,name,nDCG@10,RR,AP
1,Axiomatic LTR,0.475901,0.796266,0.190383
0,BM25,0.470551,0.799107,0.183848


In [47]:
from numpy import ndarray

feature_importance: ndarray = lambda_mart.feature_importances_.reshape(-1, len(aggregations))
feature_importance

array([[3.53962691e+00, 4.88271986e+00, 8.98717887e-02, 1.10044021e-01,
        6.77355734e-01, 4.22180108e+00, 6.16534892e-02],
       [1.12641352e+00, 1.84297464e+00, 1.70581290e+00, 3.44499658e-02,
        2.28179793e-01, 2.43759907e+00, 2.03200565e-01],
       [2.50685029e+01, 5.13284056e+00, 1.36954241e+00, 9.60822386e-02,
        3.24100329e-02, 1.80064516e+01, 1.05106501e+01],
       [6.07094014e+00, 1.03302524e+00, 1.60142772e+00, 0.00000000e+00,
        3.17919813e-01, 1.32853560e+00, 5.45561012e+00],
       [1.02719601e-01, 4.15137305e-02, 9.20499231e-01, 4.20133556e+00,
        1.88329137e-01, 0.00000000e+00, 1.98332000e+00],
       [3.11966548e+00, 3.90741867e+00, 4.36768006e-01, 1.35187585e+00,
        3.93872125e-02, 2.47504320e-02, 1.47237484e+00],
       [2.01742371e+01, 3.81565872e+00, 5.55264138e+00, 1.02132477e+00,
        1.04590540e-01, 5.35229373e-01, 5.71365235e-01],
       [4.84482560e+00, 3.87233031e+00, 3.53681798e+00, 1.06177442e-01,
        4.47618893e-02, 8

In [49]:
feature_importance.sum(1)

array([1.35830729e+01, 7.57863046e+00, 6.02164799e+01, 1.58074586e+01,
       7.43771726e+00, 1.03522405e+01, 3.17750471e+01, 2.56085082e+01,
       1.19903177e+02, 0.00000000e+00, 2.10065668e+01, 1.03599341e+01,
       4.14689536e+01, 5.23711533e+01, 1.86204315e+01, 6.86199632e+01,
       6.40380468e+00, 6.42081310e+00, 6.98094808e+01, 2.39755042e+00,
       7.23188739e+01, 1.69824129e+01, 5.86539854e+01, 5.82766846e+01,
       1.93849151e+01, 3.47392678e+01, 6.91398733e+01, 3.10737406e+01,
       0.00000000e+00, 3.72549753e-03, 0.00000000e+00, 6.70312873e+01,
       5.65790709e+01, 2.04846076e+01, 1.93611262e+01, 2.46109326e+00])