In [115]:
from itertools import groupby
from math import inf
from pathlib import Path
from typing import NamedTuple

from ir_datasets import load
from ir_measures import Measure, nDCG, Bpref, define_byquery, P
from pandas import read_csv, DataFrame
from tqdm.auto import tqdm

In [23]:
runs_path = Path("../data/runs_saved")

In [85]:
class RunConf(NamedTuple):
    year: int
    model: str
    transformation: str
    path: Path


run_confs = (
    RunConf(2020, r"BM25", r"With pools + T \& C", runs_path / "topics_conclusion_tuned_bm25.csv"),
    RunConf(2020, r"BM25 + RM3", r"With pools + T \& C", runs_path / "topics_conclusion_tuned_bm_rm3.csv"),
    RunConf(2020, r"DLM", r"With pools + T \& C", runs_path / "topics_conclusion_tuned_dlm.csv"),
    RunConf(2020, r"DLM + RM3", r"All doc.\ + C", runs_path / "conclusions_leakage_tuned_dlm_rm3.csv"),
    RunConf(2021, r"BM25", r"Without pools + T \& C", runs_path / "pool_topics_conclusions_tuned_bm25.csv"),
    RunConf(2021, r"BM25 + RM3", r"With pools + T \& C", runs_path / "topics_conclusion_tuned_bm_rm3.csv"),
    RunConf(2021, r"DLM", r"All doc. + T \& C", runs_path / "topics_conclusion_leakage_tuned_dlm.csv"),
    RunConf(2021, r"DLM + RM3", r"All doc.\ + T \& C", runs_path / "topics_conclusion_leakage_tuned_dlm_rm3.csv"),
)

In [129]:
def read_run(path: Path) -> DataFrame:
    df = read_csv(path)
    df["query_id"] = df["qid"].astype(str)
    df["doc_id"] = df["docno"].astype(str)
    df = df[df["query_id"] != "25"]  # Unjudged topic from 2020.
    return df

In [130]:
runs = {
    run_conf: read_run(run_conf.path)
    for run_conf in run_confs
}

In [131]:
datasets = {
    2020: load("argsme/2020-04-01/touche-2020-task-1"),
    2021: load("argsme/2020-04-01/touche-2021-task-1"),
}

In [132]:
qrels = {
    year: dataset.qrels_dict()
    for year, dataset in datasets.items()
}

/home/heinrich/.ir_datasets/touche/2020/task-1/qrels.qrels


In [133]:
class MeasureConf(NamedTuple):
    unjudged_removed: bool
    name: str
    measure: Measure
    maximize: bool


def _hole_impl(qrels: DataFrame, run: DataFrame):
    query_document_ids = set((qrels["query_id"] + qrels["doc_id"]).unique())
    return len(run[~(run["query_id"] + run["doc_id"]).isin(query_document_ids)]) / len(run)


Hole = define_byquery(impl=_hole_impl, name="Hole", support_cutoff=True)

measure_confs = (
    MeasureConf(False, "nDCG@5", nDCG @ 5, True),
    MeasureConf(False, "P@3", P @ 3, True),
    MeasureConf(False, "P@5", P @ 5, True),
    MeasureConf(False, "Hole@5", Hole @ 5, False),
    MeasureConf(True, "nDCG@5", nDCG(judged_only=True) @ 5, True),
    MeasureConf(True, "P@3", P(judged_only=True) @ 3, True),
    MeasureConf(True, "P@5", P(judged_only=True) @ 5, True),
    MeasureConf(True, "Bpref", Bpref, True),
)

In [135]:
print(r"  & ", end="")
for should_unjudged_removed in (True, False):
    for measure_conf in measure_confs:
        if measure_conf.unjudged_removed != should_unjudged_removed:
            continue
        print(r" & " + measure_conf.name, end="")
print(r" \\")
for year, run_conf_group in groupby(run_confs, lambda run_conf: run_conf.year):
    print(r"  \midrule")
    print(r"  \multicolumn{5}{c}{" + f"{year}" + r"} \\")
    print(r"  \midrule")
    qrel = qrels[year]
    query_document_ids = {
        f"{query_id}{doc_id}"
        for query_id, doc_rel in qrel.items()
        for doc_id, _ in doc_rel.items()
    }
    for run_conf in run_conf_group:
        cols = [
            run_conf.model,
            run_conf.transformation,
        ]
        run = runs[run_conf]
        for should_unjudged_removed in (True, False):
            for measure_conf in measure_confs:
                if measure_conf.unjudged_removed != should_unjudged_removed:
                    continue
                aggregated = measure_conf.measure.calc_aggregate(qrel, run)
                cols += [f"{aggregated:.2f}"]
        print(r"  " + r" & ".join(cols) + r" \\")

  &  & nDCG@5 & P@3 & P@5 & Bpref & nDCG@5 & P@3 & P@5 & Hole@5 \\
  \midrule
  \multicolumn{5}{c}{2020} \\
  \midrule
  BM25 & With pools + T \& C & 0.84 & 0.92 & 0.89 & 0.71 & 0.42 & 0.47 & 0.43 & 0.77 \\
  BM25 + RM3 & With pools + T \& C & 0.87 & 0.95 & 0.93 & 0.77 & 0.40 & 0.43 & 0.40 & 0.77 \\
  DLM & With pools + T \& C & 0.82 & 0.90 & 0.87 & 0.68 & 0.45 & 0.51 & 0.44 & 0.77 \\
  DLM + RM3 & All doc.\ + C & 0.88 & 0.95 & 0.94 & 0.71 & 0.42 & 0.49 & 0.42 & 0.78 \\
  \midrule
  \multicolumn{5}{c}{2021} \\
  \midrule
  BM25 & Without pools + T \& C & 0.74 & 0.83 & 0.83 & 0.74 & 0.61 & 0.71 & 0.64 & 0.60 \\
  BM25 + RM3 & With pools + T \& C & 0.74 & 0.82 & 0.84 & 0.74 & 0.53 & 0.62 & 0.58 & 0.64 \\
  DLM & All doc. + T \& C & 0.74 & 0.85 & 0.83 & 0.72 & 0.61 & 0.69 & 0.68 & 0.60 \\
  DLM + RM3 & All doc.\ + T \& C & 0.70 & 0.82 & 0.80 & 0.73 & 0.51 & 0.62 & 0.59 & 0.64 \\


In [121]:
touche_runs_paths = (
    (2020, Path("../data/touche2020-task1-runs-args-me-corpus-version-2020-04-01")),
    (2021, Path("../data/touche2021-task1-runs")),
)

In [127]:
def read_touche_run(path: Path) -> DataFrame:
    df = read_csv(path, sep=" ", header=None, names=["qid", "Q0", "docno", "rank", "score", "run_id"])
    df["query_id"] = df["qid"].astype(str)
    df["doc_id"] = df["docno"].astype(str)
    df = df[df["query_id"] != "25"]  # Unjudged topic from 2020.
    return df

In [138]:
for year, runs_path in touche_runs_paths:
    print(year)
    qrel = qrels[year]
    best_metrics = {
        measure_conf: -inf if measure_conf.maximize else inf
        for measure_conf in measure_confs
    }
    best_metric_system = {    }
    run_paths = list(runs_path.rglob("run*.txt"))
    for run_path in tqdm(run_paths):
        run = read_touche_run(run_path)
        for measure_conf in measure_confs:
            aggregated = measure_conf.measure.calc_aggregate(qrel, run)
            if measure_conf.maximize and  aggregated > best_metrics[measure_conf]:
                best_metrics[measure_conf] = aggregated
                best_metric_system[measure_conf] = run_path.parent.name + run["run_id"].iloc[0]
            elif not measure_conf.maximize and aggregated < best_metrics[measure_conf]:
                best_metrics[measure_conf] = aggregated
                best_metric_system[measure_conf] = run_path.parent.name + run["run_id"].iloc[0]

    for measure_conf, metric in best_metrics.items():
        print(round(metric, 2), best_metric_system[measure_conf], measure_conf)

2020


  0%|          | 0/12 [00:00<?, ?it/s]

0.83 Swordsmandirichlet_new_dataset MeasureConf(unjudged_removed=False, name='nDCG@5', measure=nDCG@5, maximize=True)
0.88 Swordsmandirichlet_new_dataset MeasureConf(unjudged_removed=False, name='P@3', measure=P@3, maximize=True)
0.88 Swordsmandirichlet_new_dataset MeasureConf(unjudged_removed=False, name='P@5', measure=P@5, maximize=True)
0.0 AragornAragorncedr_knrm MeasureConf(unjudged_removed=False, name='Hole@5', measure=Hole@5, maximize=False)
0.83 Swordsmandirichlet_new_dataset MeasureConf(unjudged_removed=True, name='nDCG@5', measure=nDCG(judged_only=True)@5, maximize=True)
0.88 Swordsmandirichlet_new_dataset MeasureConf(unjudged_removed=True, name='P@3', measure=P(judged_only=True)@3, maximize=True)
0.89 Swordsmandirichlet_new_dataset MeasureConf(unjudged_removed=True, name='P@5', measure=P(judged_only=True)@5, maximize=True)
0.7 Swordsmandirichlet_new_dataset MeasureConf(unjudged_removed=True, name='Bpref', measure=Bpref, maximize=True)
2021


  0%|          | 0/71 [00:00<?, ?it/s]

0.72 ElrondElrondKRun MeasureConf(unjudged_removed=False, name='nDCG@5', measure=nDCG@5, maximize=True)
0.83 Pippin-Tookseupd2021-rck-stop-kstem-doShingle-false-shingle-size-0-Dirichlet-mu-2000.0-topics-2021 MeasureConf(unjudged_removed=False, name='P@3', measure=P@3, maximize=True)
0.8 Pippin-Tookseupd2021-rck-stop-kstem-doShingle-false-shingle-size-0-Dirichlet-mu-2000.0-topics-2021 MeasureConf(unjudged_removed=False, name='P@5', measure=P@5, maximize=True)
0.0 Heimdallargrank_r1_c10.0_q5.0 MeasureConf(unjudged_removed=False, name='Hole@5', measure=Hole@5, maximize=False)
0.74 ElrondElrondSimpleRun MeasureConf(unjudged_removed=True, name='nDCG@5', measure=nDCG(judged_only=True)@5, maximize=True)
0.86 Robin-Hoodrobinhood_use MeasureConf(unjudged_removed=True, name='P@3', measure=P(judged_only=True)@3, maximize=True)
0.83 Dread-Pirate-Robertsdreadpirateroberts_universal-sentence-encoder-qa MeasureConf(unjudged_removed=True, name='P@5', measure=P(judged_only=True)@5, maximize=True)
0.73 

In [5]:
from numpy import arange

params_bm25 = (
        len(arange(0.15,0.75+0.001,0.2))
        *len(arange(0.6,4.4+0.01,0.6))
        *len([2,5,8,10])
)
print(params_bm25)
params_dlm = (
        len(arange(0,10000+1,250))
)
print(params_dlm)
params_rm3 = (
        len(arange(4,16+1,2))
        *len(arange(4,10+1,2))
        *len(arange(0.2,1+0.1,0.2))
)
print(params_rm3)
print(params_bm25 * params_rm3)
print(params_dlm * params_rm3)
print(1-1/params_bm25)

112
41
140
15680
5740
0.9910714285714286
