In [1]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

PyTerrier 0.8.0 has loaded Terrier 5.6 (built by craigmacdonald on 2021-09-17 13:27)

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
from pyterrier.datasets import get_dataset, Dataset

# Load dataset.
dataset_name = "msmarco-passage"
dataset: Dataset = get_dataset(f"irds:{dataset_name}")
dataset_train: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2019/judged")
dataset_test: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2020/judged")

In [3]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.split("/")[0]

In [4]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=["text"]
    )

In [5]:
from pyterrier.batchretrieve import BatchRetrieve

# BM25 baseline retrieval.
bm25 = BatchRetrieve(str(index_dir.absolute()), wmodel="BM25")

In [6]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC, STMC1, STMC1_f, STMC2, STMC2_f, LNC1, TF_LNC, LB1,
    REG, ANTI_REG, REG_f, ANTI_REG_f, ASPECT_REG, ASPECT_REG_f, ORIG
)

axioms = [
    ~ArgUC(), ~QTArg(), ~QTPArg(), ~aSL(),
    ~LNC1(), ~TF_LNC(), ~LB1(),
    ~PROX1(), ~PROX2(), ~PROX3(), ~PROX4(), ~PROX5(),
    ~REG(), ~REG_f(), ~ANTI_REG(), ~ANTI_REG_f(), ~ASPECT_REG(), ~ASPECT_REG_f(),
    ~AND(), ~LEN_AND(), ~M_AND(), ~LEN_M_AND(), ~DIV(), ~LEN_DIV(),
    ~RS_TF(), ~RS_TF_IDF(), ~RS_BM25(), ~RS_PL2(), ~RS_QL(),
    ~TFC1(), ~TFC3(), ~M_TDC(), ~LEN_M_TDC(),
    ~STMC1(), ~STMC1_f(), ~STMC2(), ~STMC2_f(),
]
axiom_names = [axiom.axiom.name for axiom in axioms]

In [7]:
from ir_axioms.backend.pyterrier.experiment import AxiomaticExperiment

experiment = AxiomaticExperiment(
    retrieval_systems=[bm25],
    topics=dataset_test.get_topics(),
    qrels=dataset_test.get_qrels(),
    index=index_dir,
    dataset=dataset_name,
    axioms=axioms,
    axiom_names=axiom_names,
    depth=10,
    filter_by_qrels=False,
    filter_by_topics=False,
    verbose=True,
    cache_dir=cache_dir,
)

In [8]:
experiment.preferences

Computing system axiomatic preferences:   0%|          | 0/1 [00:00<?, ?system/s]

Computing query axiom preferences:   0%|          | 0/54 [00:00<?, ?query/s]

Unnamed: 0,qid,docid_a,docno_a,rank_a,score_a,query,label_a,iteration_a,docid_b,docno_b,...,RS-PL2_preference,RS-QL_preference,TFC1_preference,TFC3_preference,M-TDC_preference,LEN-M-TDC_preference,STMC1_preference,STMC1-fastText_preference,STMC2_preference,STMC2-fastText_preference
0,1030303,8726436,8726436,0,54.354218,who is aziz hashim,3.0,0,8726436,8726436,...,0,0,0,0,0,0,0,0,0,0
1,1030303,8726436,8726436,0,54.354218,who is aziz hashim,3.0,0,8726433,8726433,...,1,1,0,0,0,0,1,1,0,0
2,1030303,8726436,8726436,0,54.354218,who is aziz hashim,3.0,0,8726435,8726435,...,1,1,0,0,0,0,1,1,0,0
3,1030303,8726436,8726436,0,54.354218,who is aziz hashim,3.0,0,8726429,8726429,...,1,1,-1,0,0,0,1,1,0,0
4,1030303,8726436,8726436,0,54.354218,who is aziz hashim,3.0,0,8726437,8726437,...,1,1,0,0,0,0,-1,1,0,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5395,997622,7965342,7965342,9,30.023091,where is the show shameless filmed,0.0,0,4643397,4643397,...,1,-1,0,0,0,0,-1,1,0,-1
5396,997622,7965342,7965342,9,30.023091,where is the show shameless filmed,0.0,0,4518222,4518222,...,1,-1,0,0,0,0,1,1,0,0
5397,997622,7965342,7965342,9,30.023091,where is the show shameless filmed,0.0,0,4810071,4810071,...,-1,-1,0,0,0,0,-1,1,0,0
5398,997622,7965342,7965342,9,30.023091,where is the show shameless filmed,0.0,0,4558331,4558331,...,1,-1,0,0,0,0,1,1,0,0


In [12]:
experiment.inconsistent_pairs.mean()

  experiment.inconsistent_pairs.mean()


qid                                        inf
docid_a                           4.859517e+06
docno_a                                    inf
rank_a                            6.326316e+00
score_a                           3.505220e+01
label_a                           2.223158e+00
iteration_a                       0.000000e+00
docid_b                           4.742632e+06
docno_b                                    inf
rank_b                            2.810526e+00
score_b                           3.751082e+01
label_b                           4.757895e-01
iteration_b                       0.000000e+00
ORIG_preference                  -1.000000e+00
ORACLE_preference                 1.000000e+00
ArgUC_preference                 -2.105263e-02
QTArg_preference                 -4.842105e-02
QTPArg_preference                 2.105263e-02
aSL_preference                    2.105263e-03
LNC1_preference                  -2.105263e-03
TF-LNC_preference                 1.263158e-02
LB1_preferenc

In [10]:
experiment.preference_consistency

Unnamed: 0,axiom,ORIG_consistency,ORACLE_consistency
0,ArgUC,0.494475,0.494505
1,QTArg,0.662539,0.538922
2,QTPArg,0.591623,0.673367
3,aSL,0.462185,0.531746
4,LNC1,0.578947,0.5625
5,TF-LNC,0.58042,0.613333
6,LB1,0.664957,0.625352
7,PROX1,0.568596,0.605611
8,PROX2,0.592907,0.63474
9,PROX3,0.666667,0.482759


In [11]:
experiment.preference_distribution

Unnamed: 0,axiom,axiom == 0,axiom == ORIG,axiom != ORIG
0,ArgUC,2068,179,183
1,QTArg,2107,214,109
2,QTPArg,2048,226,156
3,aSL,2192,110,128
4,LNC1,2373,33,24
5,TF-LNC,2287,83,60
6,LB1,1845,389,196
7,PROX1,1169,717,544
8,PROX2,1133,769,528
9,PROX3,2340,60,30
