In [1]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [2]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

PyTerrier 0.8.0 has loaded Terrier 5.6 (built by craigmacdonald on 2021-09-17 13:27)

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [3]:
from pyterrier.datasets import get_dataset, Dataset

# Load dataset.
dataset_name = "msmarco-passage"
dataset: Dataset = get_dataset(f"irds:{dataset_name}")
dataset_train: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2019/judged")
dataset_test: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2020/judged")

In [4]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.split("/")[0]

In [5]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=["text"]
    )

In [6]:
from pyterrier.batchretrieve import BatchRetrieve

# BM25 baseline retrieval.
bm25 = BatchRetrieve(str(index_dir.absolute()), wmodel="BM25")

In [7]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC, STMC1, STMC1_f, STMC2, STMC2_f, LNC1, TF_LNC, LB1,
    REG, ANTI_REG, REG_f, ANTI_REG_f, ASPECT_REG, ASPECT_REG_f, ORIG
)

axioms = [
    ~ArgUC(), ~QTArg(), ~QTPArg(), ~aSL(),
    ~LNC1(), ~TF_LNC(), ~LB1(),
    ~PROX1(), ~PROX2(), ~PROX3(), ~PROX4(), ~PROX5(),
    ~REG(), ~REG_f(), ~ANTI_REG(), ~ANTI_REG_f(), ~ASPECT_REG(), ~ASPECT_REG_f(),
    ~AND(), ~LEN_AND(), ~M_AND(), ~LEN_M_AND(), ~DIV(), ~LEN_DIV(),
    ~RS_TF(), ~RS_TF_IDF(), ~RS_BM25(), ~RS_PL2(), ~RS_QL(),
    ~TFC1(), ~TFC3(), ~M_TDC(), ~LEN_M_TDC(),
    ~STMC1(), ~STMC1_f(), ~STMC2(), ~STMC2_f(),
    ORIG()
]

In [8]:
from ir_axioms.backend.pyterrier.transformers import AggregatedAxiomaticPreferences

aggregations = [
    lambda prefs: sum(p >= 0 for p in prefs) / len(prefs),
    lambda prefs: sum(p == 0 for p in prefs) / len(prefs),
    lambda prefs: sum(p <= 0 for p in prefs) / len(prefs),
]
features = bm25 % 20 >> AggregatedAxiomaticPreferences(
    axioms=axioms,
    index=index_dir,
    aggregations=aggregations,
    dataset=dataset_name,
    cache_dir=cache_dir,
    verbose=True,
)

In [9]:
features.transform(dataset_train.get_topics()[:1])["features"]

Aggregating query axiom preferences:   0%|          | 0/1 [00:00<?, ?query/s]

0     [0.95, 0.8, 0.85, 0.95, 0.95, 1.0, 0.85, 0.85,...
1     [0.95, 0.95, 1.0, 0.95, 0.9, 0.95, 0.95, 0.9, ...
2     [1.0, 0.85, 0.85, 0.95, 0.9, 0.95, 0.8, 0.8, 1...
3     [1.0, 0.85, 0.85, 0.95, 0.9, 0.95, 1.0, 0.75, ...
4     [0.8, 0.75, 0.95, 0.95, 0.9, 0.95, 0.85, 0.65,...
5     [0.9, 0.9, 1.0, 0.95, 0.95, 1.0, 0.9, 0.8, 0.9...
6     [0.9, 0.9, 1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 1.0, ...
7     [1.0, 1.0, 1.0, 0.95, 0.95, 1.0, 0.95, 0.95, 1...
8     [1.0, 0.9, 0.9, 1.0, 0.9, 0.9, 0.95, 0.9, 0.95...
9     [1.0, 0.85, 0.85, 0.85, 0.8, 0.95, 0.85, 0.8, ...
10    [0.85, 0.8, 0.95, 1.0, 0.75, 0.75, 0.75, 0.75,...
11    [0.75, 0.75, 1.0, 0.75, 0.75, 1.0, 0.95, 0.8, ...
12    [0.75, 0.75, 1.0, 0.8, 0.8, 1.0, 1.0, 0.7, 0.7...
13    [1.0, 0.85, 0.85, 0.95, 0.95, 1.0, 0.95, 0.85,...
14    [1.0, 0.85, 0.85, 0.95, 0.8, 0.85, 0.85, 0.8, ...
15    [0.9, 0.85, 0.95, 0.95, 0.95, 1.0, 1.0, 0.85, ...
16    [0.95, 0.8, 0.85, 1.0, 0.7, 0.7, 0.75, 0.75, 1...
17    [0.85, 0.8, 0.95, 0.95, 0.9, 0.95, 0.95, 0

In [10]:
from lightgbm import LGBMRanker
from pyterrier.ltr import apply_learned_model

lambda_mart = LGBMRanker(
    num_iterations=1000,
    metric="ndcg",
    eval_at=[10],
    importance_type="gain",
)
ltr = features >> apply_learned_model(lambda_mart, form="ltr")

In [11]:
ltr.fit(
    dataset_train.get_topics()[:-5],
    dataset_train.get_qrels(),
    dataset_train.get_topics()[-5:],
    dataset_train.get_qrels()
)

Aggregating query axiom preferences:   0%|          | 0/38 [00:00<?, ?query/s]

Aggregating query axiom preferences:   0%|          | 0/5 [00:00<?, ?query/s]



[1]	valid_0's ndcg@10: 0.499041
[2]	valid_0's ndcg@10: 0.540851
[3]	valid_0's ndcg@10: 0.56665
[4]	valid_0's ndcg@10: 0.54274
[5]	valid_0's ndcg@10: 0.554685
[6]	valid_0's ndcg@10: 0.597619
[7]	valid_0's ndcg@10: 0.567879
[8]	valid_0's ndcg@10: 0.5714
[9]	valid_0's ndcg@10: 0.562249
[10]	valid_0's ndcg@10: 0.570762
[11]	valid_0's ndcg@10: 0.589228
[12]	valid_0's ndcg@10: 0.59746
[13]	valid_0's ndcg@10: 0.633497
[14]	valid_0's ndcg@10: 0.605597
[15]	valid_0's ndcg@10: 0.601942
[16]	valid_0's ndcg@10: 0.609102
[17]	valid_0's ndcg@10: 0.608157
[18]	valid_0's ndcg@10: 0.605237
[19]	valid_0's ndcg@10: 0.616162
[20]	valid_0's ndcg@10: 0.608126
[21]	valid_0's ndcg@10: 0.581933
[22]	valid_0's ndcg@10: 0.556621
[23]	valid_0's ndcg@10: 0.547529
[24]	valid_0's ndcg@10: 0.568303
[25]	valid_0's ndcg@10: 0.566846
[26]	valid_0's ndcg@10: 0.574184
[27]	valid_0's ndcg@10: 0.589674
[28]	valid_0's ndcg@10: 0.59666
[29]	valid_0's ndcg@10: 0.609654
[30]	valid_0's ndcg@10: 0.569011
[31]	valid_0's ndcg@10: 0

In [12]:
from pyterrier.pipelines import Experiment
from ir_measures import nDCG, MAP, RR

experiment = Experiment(
    [bm25, ltr ^ bm25],
    dataset_test.get_topics(),
    dataset_test.get_qrels(),
    [nDCG @ 10, RR, MAP],
    ["BM25", "Axiomatic LTR"],
    verbose=True,
)
experiment.sort_values(by="nDCG@10", ascending=False, inplace=True)

pt.Experiment:   0%|          | 0/2 [00:00<?, ?system/s]

Aggregating query axiom preferences:   0%|          | 0/54 [00:00<?, ?query/s]

In [13]:
experiment

Unnamed: 0,name,nDCG@10,RR,AP
1,Axiomatic LTR,0.498449,0.831643,0.363394
0,BM25,0.493627,0.802359,0.358724


In [14]:
from numpy import ndarray

feature_importance: ndarray = lambda_mart.feature_importances_.reshape(-1, len(aggregations))
feature_importance

array([[5.86175474e+00, 1.21515853e+01, 6.04408537e-01],
       [3.17786428e+00, 2.09675333e+00, 8.62585699e-01],
       [3.06402294e+01, 6.16518896e+00, 1.23945687e+01],
       [3.82426612e+00, 1.13830602e+01, 3.62478426e+00],
       [2.15677102e+00, 8.66048304e+00, 7.62573378e+00],
       [4.77641736e-01, 1.16048395e+01, 1.29105230e+00],
       [4.66689994e+00, 1.63565951e+01, 1.52429873e+01],
       [1.91325840e+01, 5.96456131e+00, 1.72812464e+01],
       [2.21793423e+01, 8.65126559e+00, 5.35151484e+01],
       [0.00000000e+00, 2.11847127e-01, 1.66543802e-02],
       [1.33425855e+01, 2.61285216e+00, 1.31349912e+01],
       [4.67854904e+00, 3.53968077e+00, 4.14729606e+00],
       [1.27602109e+01, 1.64312607e+00, 1.96652536e+01],
       [2.06499505e+01, 1.47197453e-01, 2.21904958e+01],
       [1.19799162e+01, 5.36651533e+00, 1.41521068e+01],
       [1.96584544e+01, 1.30074217e+01, 1.97514260e+01],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000

In [15]:
feature_importance.sum(0)

array([374.34106577, 175.00640692, 415.60567636])

In [16]:
feature_importance.sum(1)

array([18.61774858,  6.1372033 , 49.19998711, 18.83211055, 18.44298784,
       13.37353352, 36.26648232, 42.37839165, 84.34575624,  0.22850151,
       29.09042892, 12.36552587, 34.06859055, 42.98764371, 31.4985383 ,
       52.41730219,  0.        ,  0.        , 16.59287924,  0.85480563,
       43.50205468, 12.15295285, 64.90429097, 12.11750185, 39.98043344,
       51.93847598, 18.89247075, 16.09435895, 32.12913233, 16.2578497 ,
        0.        ,  0.        ,  0.        , 49.40362889, 57.74065702,
       10.60284219, 27.06759163,  4.47049077])