In [8]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [9]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

In [None]:
edition = 28
track = "deep"

In [None]:
from pyterrier.datasets import get_dataset
from ir_datasets import load

dataset_name = "msmarco-passage/trec-dl-2019"
dataset = get_dataset(f"irds:{dataset_name}")
ir_dataset = load(dataset_name)

In [10]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.replace("/", "-")
result_dir = Path(
    "/mnt/ceph/storage/data-in-progress/data-research/"
    "web-search/web-search-trec/trec-system-runs"
) / f"trec{edition}" / track
result_files = list(result_dir.iterdir())

In [None]:
from pyterrier.io import read_results
from pyterrier import Transformer

results = [Transformer.from_df(read_results(result_file)) for result_file in result_files]
results_names = [result_file.stem.replace("input.", "") for result_file in result_files]

In [None]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=["text"]
    )

In [1]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC, STMC1, STMC1_f, STMC2, STMC2_f, LNC1, TF_LNC, LB1,
    REG, ANTI_REG
)

axioms = [
    ~ArgUC(),
    ~QTArg(),
    ~QTPArg(),
    ~aSL(),
    ~LNC1(),
    ~TF_LNC(),
    ~LB1(),
    ~PROX1(),
    ~PROX2(),
    ~PROX3(),
    ~PROX4(),
    ~PROX5(),
    # ~REG(),  # Tie
    # ~ANTI_REG(),  # Tie.
    ~AND(),
    ~LEN_AND(),
    ~M_AND(),
    ~LEN_M_AND(),
    ~DIV(),
    ~LEN_DIV(),
    ~RS_TF(),
    ~RS_TF_IDF(),
    ~RS_BM25(),
    ~RS_PL2(),
    ~RS_QL(),
    ~TFC1(),
    ~TFC3(),
    ~M_TDC(),
    ~LEN_M_TDC(),
    ~STMC1(),
    ~STMC1_f(),
    ~STMC2(),
    ~STMC2_f(),
]

NameError: name 'REG' is not defined

In [13]:
from ir_axioms.backend.pyterrier.experiment import AxiomaticExperiment

axiomatic_experiment = AxiomaticExperiment(
    retrieval_systems=[result % 10 for result in results],
    topics=dataset.get_topics(),
    qrels=dataset.get_qrels(),
    index=index_dir,
    dataset=ir_dataset,
    axioms=axioms,
    filter_by_qrels=True,
    filter_by_topics=True,
    verbose=True,
    cache_dir=cache_dir,
)

In [14]:
preferences = axiomatic_experiment.preferences
preferences

Computing system axiomatic preferences:   0%|          | 0/37 [00:00<?, ?system/s]

Computing query axiom preferences:   0%|          | 0/43 [00:00<?, ?query/s]

Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

04:14:14.741 [main] WARN org.terrier.structures.BaseCompressingMetaIndex - OutOfMemoryError: Structure meta reading lookup file directly from disk
04:14:14.795 [main] WARN org.terrier.structures.BaseCompressingMetaIndex - OutOfMemoryError: Structure meta reading data file directly from disk


Computing axiom preferences:   0%|          | 0/7 [00:00<?, ?axiom/s]

04:14:18.081 [main] ERROR org.terrier.structures.Index - Couldn't load an index structure called document
java.lang.OutOfMemoryError: Java heap space
04:14:18.209 [main] WARN org.terrier.structures.BaseCompressingMetaIndex - OutOfMemoryError: Structure meta reading lookup file directly from disk
04:14:18.246 [main] WARN org.terrier.structures.BaseCompressingMetaIndex - OutOfMemoryError: Structure meta reading data file directly from disk
04:14:18.284 [main] ERROR org.terrier.structures.Index - Couldn't load an index structure called document
java.lang.OutOfMemoryError: Java heap space
04:14:18.321 [main] ERROR org.terrier.structures.Index - Couldn't load an index structure called document
java.lang.OutOfMemoryError: Java heap space


JavaException: JVM exception occurred: Could not load an index for ref /home/heinrich/Development/webis/ir_axioms/examples/cache/indices/msmarco-passage/trec-dl-2019, even though IndexLoader org.terrier.structures.IndexOnDisk$DiskIndexLoader could support that type of index. It may be your ref had a wrong location; Terrier logs may have more information. java.lang.IllegalArgumentException

In [None]:
wrong_preferences = preferences[abs(preferences["oracle_preference"] - preferences["original_preference"]) >= 2]
wrong_preferences

In [None]:
axiom_columns = [f"{axiom.name}_preference" for axiom in axioms]
wrong_preferences_with_axiom_hints = wrong_preferences[(wrong_preferences[axiom_columns] != 0)]
wrong_preferences_with_axiom_hints

In [None]:
wrong_preferences_per_query = wrong_preferences.groupby(["qid"]).aggregate(
    wrong_preferences=("original_preference", "count"))
wrong_preferences_per_query.sort_values(by="wrong_preferences", ascending=False, inplace=True)
wrong_preferences_per_query

In [None]:
wrong_preferences_per_run = wrong_preferences.groupby(["name"]).aggregate(
    wrong_preferences=("original_preference", "count"))
wrong_preferences_per_run.sort_values(by="wrong_preferences", ascending=False, inplace=True)
wrong_preferences_per_run