In [96]:
from itertools import groupby
from math import inf
from pathlib import Path
from typing import NamedTuple
from tira.rest_api_client import Client

from ir_datasets import load
from ir_measures import Measure, nDCG, Bpref, define_byquery, P, Judged, Recall
from pandas import read_csv, DataFrame
from tqdm.auto import tqdm

In [97]:
class RunConf(NamedTuple):
    year: int
    model: str
    transformation: str
    path: Path

In [98]:
tira_client = Client()

In [99]:
runs_path = Path("../data/runs_saved")

In [110]:
run_confs = (
    RunConf(
        year=2020,
        model=r"BM25",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/tira-ir-starter/BM25 (tira-ir-starter-pyterrier)",
            dataset="argsme-touche-2020-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2020,
        model=r"BM25",
        transformation=r"With pools + T \& C",
        path=runs_path / "topics_conclusion_tuned_bm25.csv"
    ),
    RunConf(
        year=2020,
        model=r"BM25 + RM3",
        transformation=r"With pools + T \& C",
        path=runs_path / "topics_conclusion_tuned_bm_rm3.csv"
    ),
    RunConf(
        year=2020,
        model=r"DLM",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/tira-ir-starter/DirichletLM (tira-ir-starter-pyterrier)",
            dataset="argsme-touche-2020-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2020,
        model=r"DLM",
        transformation=r"With pools + T \& C",
        path=runs_path / "topics_conclusion_tuned_dlm.csv"
    ),
    RunConf(
        year=2020,
        model=r"DLM + RM3",
        transformation=r"All doc.\ + C",
        path=runs_path / "conclusions_leakage_tuned_dlm_rm3.csv"
    ),
    RunConf(
        year=2020,
        model=r"BM25 + monoBERT",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/tira-ir-starter/MonoBERT Large (tira-ir-starter-gygaggle)",
            dataset="argsme-touche-2020-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2020,
        model=r"BM25 + monoT5",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/tira-ir-starter/MonoT5 3b (tira-ir-starter-gygaggle)",
            dataset="argsme-touche-2020-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2020,
        model=r"BM25 + LiT5",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/fschlatt/castorini-list-in-t5-300",
            dataset="argsme-touche-2020-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2020,
        model=r"ColBERT",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)",
            dataset="argsme-touche-2020-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2021,
        model=r"BM25",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/tira-ir-starter/BM25 (tira-ir-starter-pyterrier)",
            dataset="argsme-touche-2021-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2021,
        model=r"BM25",
        transformation=r"Without pools + T \& C",
        path=runs_path / "pool_topics_conclusions_tuned_bm25.csv"
    ),
    RunConf(
        year=2021,
        model=r"BM25 + RM3",
        transformation=r"With pools + T \& C",
        path=runs_path / "topics_conclusion_tuned_bm_rm3.csv"
    ),
    RunConf(
        year=2021,
        model=r"DLM",
        transformation=r"n/a",
        path=Path(tira_client.get_run_output(
            approach="ir-benchmarks/tira-ir-starter/DirichletLM (tira-ir-starter-pyterrier)",
            dataset="argsme-touche-2021-task-1-20230209-training"
        )) / "run.txt"
    ),
    RunConf(
        year=2021,
        model=r"DLM",
        transformation=r"All doc. + T \& C",
        path=runs_path / "topics_conclusion_leakage_tuned_dlm.csv"
    ),
    RunConf(
        year=2021,
        model=r"DLM + RM3",
        transformation=r"All doc.\ + T \& C",
        path=runs_path / "topics_conclusion_leakage_tuned_dlm_rm3.csv"
    ),
    # RunConf(
    #     year=2021,
    #     model=r"monoBERT",
    #     transformation=r"n/a",
    #     path=Path(tira_client.get_run_output(
    #         approach="ir-benchmarks/tira-ir-starter/MonoBERT Large (tira-ir-starter-gygaggle)",
    #         dataset="argsme-touche-2021-task-1-20230209-training"
    #     )) / "run.txt"
    # ),
    # RunConf(
    #     year=2021,
    #     model=r"monoT5",
    #     transformation=r"n/a",
    #     path=Path(tira_client.get_run_output(
    #         approach="ir-benchmarks/tira-ir-starter/MonoT5 3b (tira-ir-starter-gygaggle)",
    #         dataset="argsme-touche-2021-task-1-20230209-training"
    #     )) / "run.txt"
    # ),
    # RunConf(
    #     year=2021,
    #     model=r"LiT5",
    #     transformation=r"n/a",
    #     path=Path(tira_client.get_run_output(
    #         approach="ir-benchmarks/fschlatt/castorini-list-in-t5-300",
    #         dataset="argsme-touche-2021-task-1-20230209-training"
    #     )) / "run.txt"
    # ),
    # RunConf(
    #     year=2021,
    #     model=r"ColBERT",
    #     transformation=r"n/a",
    #     path=Path(tira_client.get_run_output(
    #         approach="ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)",
    #         dataset="argsme-touche-2021-task-1-20230209-training"
    #     )) / "run.txt"
    # ),
)

In [111]:
def read_run(path: Path) -> DataFrame:
    if path.suffix == ".csv":
        df = read_csv(path)
        df["query_id"] = df["qid"].astype(str)
        df["doc_id"] = df["docno"].astype(str)
        df = df[df["query_id"] != "25"]  # Unjudged topic from 2020.
        return df
    elif path.suffix == ".txt":
        df = read_csv(path, sep="\s+", names=["query_id", None, "doc_id", "rank", "score", "name"])
        df["query_id"] = df["query_id"].astype(str)
        df["doc_id"] = df["doc_id"].astype(str)
        df = df[df["query_id"] != "25"]  # Unjudged topic from 2020.
        return df
    else:
        raise RuntimeError("Run file not supported.")

    

In [112]:
runs = {
    run_conf: read_run(run_conf.path)
    for run_conf in run_confs
}

In [113]:
touche_runs_paths = {
    2020: Path("../data/runs_touche/2020"),
    2021: Path("../data/runs_touche/2021"),
}

In [114]:
def read_touche_run(path: Path) -> DataFrame:
    df = read_csv(path, sep=" ", header=None, names=["qid", "Q0", "docno", "rank", "score", "run_id"])
    df["query_id"] = df["qid"].astype(str)
    df["doc_id"] = df["docno"].astype(str)
    df = df[df["query_id"] != "25"]  # Unjudged topic from 2020.
    return df

In [115]:
touche_runs = {
    year: [
        (f"{run_path.parent.stem.replace('-', ' ')} {run_path.stem[:-1]} {run_path.stem[-1]}", read_touche_run(run_path))
        for run_path in tqdm(list(path.rglob("run*.txt")))
    ]
    for year, path in touche_runs_paths.items()
}

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

In [116]:
datasets = {
    2020: load("argsme/2020-04-01/touche-2020-task-1"),
    2021: load("argsme/2020-04-01/touche-2021-task-1"),
}

In [117]:
qrels = {
    year: dataset.qrels_dict()
    for year, dataset in datasets.items()
}

/home/heinrich/.ir_datasets/touche/2020/task-1/qrels.qrels


In [123]:
class MeasureConf(NamedTuple):
    name: str
    measure: Measure
    maximize: bool


def _hole_impl(qrels: DataFrame, run: DataFrame):
    query_document_ids = set((qrels["query_id"] + qrels["doc_id"]).unique())
    return len(run[~(run["query_id"] + run["doc_id"]).isin(query_document_ids)]) / len(run)


Hole = define_byquery(impl=_hole_impl, name="Hole", support_cutoff=True)

measure_confs = (
    # MeasureConf(r"Recall@100", Recall(judged_only=True) @ 100, True),
    # MeasureConf(r"Recall@1000", Recall(judged_only=True) @ 1000, True),
    MeasureConf(r"nDCG@5", nDCG(judged_only=True) @ 5, True),
    # MeasureConf(r"P@3", P(judged_only=True) @ 3, True),
    # MeasureConf(r"P@5", P(judged_only=True) @ 5, True),
    # MeasureConf(r"bpref", Bpref, True),
    # MeasureConf(r"judged@5", Judged @ 5, False),
)

In [124]:
print(r"  & ", end="")
for measure_conf in measure_confs:
    print(r" & " + measure_conf.name, end="")
print(r" \\")
for year, run_conf_group in groupby(run_confs, lambda run_conf: run_conf.year):
    print(r"  \midrule")
    print(r"  \multicolumn{5}{c}{" + f"{year}" + r"} \\")
    print(r"  \midrule")
    qrel = qrels[year]
    query_document_ids = {
        f"{query_id}{doc_id}"
        for query_id, doc_rel in qrel.items()
        for doc_id, _ in doc_rel.items()
    }
    for run_conf in run_conf_group:
        cols = [
            run_conf.model,
            run_conf.transformation,
        ]
        run = runs[run_conf]
        for measure_conf in measure_confs:
            aggregated = measure_conf.measure.calc_aggregate(qrel, run)
            cols += [f"{aggregated:.2f}"]
        print(r"  " + r" & ".join(cols) + r" \\")
    cols = [
        r"Best Touch{\'e}",
        r"n/a",
    ]
    for measure_conf in measure_confs:
        metrics = (
            (run_name, measure_conf.measure.calc_aggregate(qrels[year], run))
            for run_name, run in touche_runs[year]
        )
        best = max if measure_conf.maximize else min
        best_name, best_metric = best(metrics, key=lambda name_metric: name_metric[1])
        # print(best_name, measure_conf)
        cols += [f"{best_metric:.2f}"]
    print(r"  " + r" & ".join(cols) + r" \\")

  &  & nDCG@5 \\
  \midrule
  \multicolumn{5}{c}{2020} \\
  \midrule
  BM25 & --- & 0.84 \\
  BM25 & With pools + T \& C & 0.84 \\
  BM25 + RM3 & With pools + T \& C & 0.87 \\
  DLM & --- & 0.85 \\
  DLM & With pools + T \& C & 0.82 \\
  DLM + RM3 & All doc.\ + C & 0.88 \\
  Best Touch{\'e} & --- & 0.83 \\
  \midrule
  \multicolumn{5}{c}{2021} \\
  \midrule
  BM25 & --- & 0.67 \\
  BM25 & Without pools + T \& C & 0.74 \\
  BM25 + RM3 & With pools + T \& C & 0.74 \\
  DLM & --- & 0.68 \\
  DLM & All doc. + T \& C & 0.74 \\
  DLM + RM3 & All doc.\ + T \& C & 0.70 \\
  Best Touch{\'e} & --- & 0.74 \\
