In [1]:
import os
from pathlib import Path

if "PROJECT_ROOT" not in globals():
    PROJECT_ROOT = Path.cwd().parent.resolve()

os.chdir(PROJECT_ROOT)

In [2]:
import pandas as pd
from utils import ModelForAnalysis

In [3]:
RESULTS_DIR = PROJECT_ROOT/"analysis_results"

LARGELY_SAMPLED_EPITOPES = pd.read_csv("analysis_results/CDR3 Levenshtein/ovr_nn_200_shot.csv").epitope.unique()

NUM_SHOTS_OF_INTEREST = [1,2,5,10,20,50,100,200]

In [9]:
models = (
    ModelForAnalysis("SCEPTR", "ovr_nn", "#7048e8", "d", zorder=2),
    ModelForAnalysis("TCRdist", "ovr_nn", "#f03e3e", "o", zorder=1.9),
    ModelForAnalysis("CDR3 Levenshtein", "ovr_nn", "#f76707", "^"),
    ModelForAnalysis("TCR-BERT", "ovr_nn", "#74b816", "s"),
    ModelForAnalysis("ESM2 (T6 8M)", "ovr_nn", "#37b24d", "p"),
    ModelForAnalysis("ProtBert", "ovr_nn", "#0ca678", "x"),
)

summary_tex_table_strings = []

for k in NUM_SHOTS_OF_INTEREST:
    raw_results = [model.load_data(k) for model in models]
    summary_per_model = [df.groupby("epitope").aggregate({"auc": "mean"}) for df in raw_results]
    summary_per_model = [s[s.index.isin(LARGELY_SAMPLED_EPITOPES)] for s in summary_per_model]

    for model, df in zip(models, summary_per_model):
        df.columns = [model.name]

    summary = pd.concat(summary_per_model, axis="columns")
    summary_tex_table_strings.append(
        summary.to_latex(
            caption=r"Per-epitope summary of the different models' perfomances from the nearest-neighbour prediction benchmarking (see section~\ref{sec:benchmarking}) with the number of reference TCRs k=" + str(k) + ".",
            label=f"tab:per_epitope_aurocs_{k}_shot"
        )
    )

summary_tex_tables = "\n".join(summary_tex_table_strings)

with open("supplementary_auroc_tables.tex", "w") as f:
    f.write(summary_tex_tables)