In [1]:
import pandas as pd
import glob
import os


In [2]:

# Directory containing your CV report CSVs
report_dir = "/home/valentin/workspaces/histobench/reports/lunghist700"
csv_files = glob.glob(os.path.join(report_dir, "*_cv_report.csv"))


In [11]:

# Metrics to aggregate
metrics = ["accuracy", "precision", "recall", "f1"]

rows = []
for csv_path in csv_files:
    df = pd.read_csv(csv_path)
    # Extract method name from filename (model + aggregation)
    basename = os.path.basename(csv_path)
    method = basename.replace("_cv_report.csv", "")
    for clf in df["classifier"].unique():
        sub = df[df["classifier"] == clf]
        row = {"method": f"{method} ({clf})"}
        for m in metrics:
            if m in sub.columns:
                mean = sub[m].mean()
                std = sub[m].std()
                row[m] = f"{mean:.3f} ± {std:.3f}"
        rows.append(row)

summary = pd.DataFrame(rows)


In [12]:
# Split summary into KNN and Linear Classifier tables
summary_knn = summary[summary["method"].str.contains(r"\(knn\)", case=False)].reset_index(drop=True)
summary_linear = summary[summary["method"].str.contains(r"\(logistic_regression\)", case=False)].reset_index(drop=True)

summary_knn_sorted = summary_knn.sort_values(by="f1", ascending=False).reset_index(drop=True)
summary_linear_sorted = summary_linear.sort_values(by="f1", ascending=False).reset_index(drop=True)


# Display as markdown
print("### KNN Results")
display(summary_knn_sorted)
print("### Linear Classifier Results")
display(summary_linear_sorted)


### KNN Results


Unnamed: 0,method,accuracy,precision,recall,f1
0,UNI2_LungHist700_10x_whole_roi (knn),0.901 ± 0.031,0.914 ± 0.021,0.912 ± 0.033,0.907 ± 0.029
1,UNI2_LungHist700_10x_tile_with_overlap (knn),0.879 ± 0.025,0.897 ± 0.015,0.894 ± 0.032,0.889 ± 0.025
2,H-optimus-0_LungHist700_10x_tile_with_overlap ...,0.827 ± 0.077,0.859 ± 0.041,0.856 ± 0.068,0.843 ± 0.065
3,resnet50_LungHist700_10x_tile_with_overlap (knn),0.555 ± 0.063,0.621 ± 0.068,0.574 ± 0.068,0.566 ± 0.047
4,resnet50_LungHist700_10x_whole_roi (knn),0.562 ± 0.043,0.606 ± 0.066,0.550 ± 0.047,0.549 ± 0.044
5,moco_superpixel_cluster_bioptimus_LungHist700_...,0.528 ± 0.062,0.574 ± 0.047,0.504 ± 0.059,0.500 ± 0.063
6,moco_superpixel_cluster_bioptimus_LungHist700_...,0.487 ± 0.086,0.477 ± 0.087,0.456 ± 0.075,0.448 ± 0.079


### Linear Classifier Results


Unnamed: 0,method,accuracy,precision,recall,f1
0,UNI2_LungHist700_10x_tile_with_overlap (logist...,0.933 ± 0.045,0.944 ± 0.038,0.936 ± 0.039,0.937 ± 0.040
1,H-optimus-0_LungHist700_10x_tile_with_overlap ...,0.927 ± 0.039,0.936 ± 0.033,0.931 ± 0.032,0.931 ± 0.033
2,UNI2_LungHist700_10x_whole_roi (logistic_regre...,0.923 ± 0.021,0.928 ± 0.021,0.929 ± 0.013,0.926 ± 0.017
3,resnet50_LungHist700_10x_tile_with_overlap (lo...,0.716 ± 0.037,0.740 ± 0.019,0.750 ± 0.032,0.733 ± 0.024
4,resnet50_LungHist700_10x_whole_roi (logistic_r...,0.674 ± 0.068,0.696 ± 0.076,0.705 ± 0.045,0.688 ± 0.067
5,moco_superpixel_cluster_bioptimus_LungHist700_...,0.410 ± 0.025,0.289 ± 0.018,0.364 ± 0.013,0.299 ± 0.015
6,moco_superpixel_cluster_bioptimus_LungHist700_...,0.350 ± 0.028,0.244 ± 0.036,0.310 ± 0.037,0.255 ± 0.028


In [9]:

# Generate LaTeX tables
latex_knn = summary_knn_sorted.to_latex(index=False, escape=False, column_format="l" + "c" * len(metrics))
latex_linear = summary_linear_sorted.to_latex(index=False, escape=False, column_format="l" + "c" * len(metrics))

print("\n% KNN Table\n", latex_knn)
print("\n% Linear Classifier Table\n", latex_linear)


% KNN Table
 \begin{tabular}{lccccc}
\toprule
method & accuracy & precision & recall & f1 & roc_auc \\
\midrule
UNI2_LungHist700_10x_whole_roi (knn) & 0.901 ± 0.031 & 0.914 ± 0.021 & 0.912 ± 0.033 & 0.907 ± 0.029 & nan ± nan \\
UNI2_LungHist700_10x_tile_with_overlap (knn) & 0.879 ± 0.025 & 0.897 ± 0.015 & 0.894 ± 0.032 & 0.889 ± 0.025 & nan ± nan \\
H-optimus-0_LungHist700_10x_tile_with_overlap (knn) & 0.827 ± 0.077 & 0.859 ± 0.041 & 0.856 ± 0.068 & 0.843 ± 0.065 & nan ± nan \\
resnet50_LungHist700_10x_tile_with_overlap (knn) & 0.555 ± 0.063 & 0.621 ± 0.068 & 0.574 ± 0.068 & 0.566 ± 0.047 & nan ± nan \\
resnet50_LungHist700_10x_whole_roi (knn) & 0.562 ± 0.043 & 0.606 ± 0.066 & 0.550 ± 0.047 & 0.549 ± 0.044 & nan ± nan \\
moco_superpixel_cluster_bioptimus_LungHist700_10x_tile_with_overlap (knn) & 0.528 ± 0.062 & 0.574 ± 0.047 & 0.504 ± 0.059 & 0.500 ± 0.063 & nan ± nan \\
moco_superpixel_cluster_bioptimus_LungHist700_10x_whole_roi (knn) & 0.487 ± 0.086 & 0.477 ± 0.087 & 0.456 ± 0.075 &