In [3]:
import os
import git
from pathlib import Path

ROOT_DIR =  Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
SAVE_FIGS = False

In [4]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
from reporting import *
plots_path = os.path.join(ROOT_DIR, "publication", "paper", "draft_plots")
main_df = main_df.copy()
np.random.seed(0)

In [5]:
main_df2 = main_df.fillna("None", inplace=False)

In [6]:
failCatDF = main_df.copy()
failCatDF = failCatDF[failCatDF["failure_type"]!= "low samples"]

failCatDF["pass"] = failCatDF["failure_category"].apply(lambda x: x in ["practically_pass", "actually_pass"])
failCatDF = failCatDF[["dataset_type","dataset", "subset", "transform", "orientation", "failure_category", "pass", "beat_all_priors", "best_prior"]]
failCatDF.fillna("None", inplace=True)
failCatDF["number"] = (
    failCatDF.groupby(["dataset_type", "dataset", "subset", "transform", "orientation", "failure_category"])["pass"]
    .transform("count")
)

failCatDF["pass_percentage"] = np.round(
    failCatDF.groupby(["dataset_type", "dataset", "subset", "transform", "orientation"])["pass"]
    .transform("mean") * 100
)

failCatDF["beat_all_priors_percentage"] = np.round(
    failCatDF.groupby(["dataset_type", "dataset", "subset", "transform", "orientation"])["beat_all_priors"]
    .transform("mean") * 100
)


failCatDF = failCatDF.groupby(["dataset_type", "dataset", "subset", "transform", "orientation", "failure_category"]).first().reset_index().sort_values(by=["dataset_type", "dataset", "subset", "transform", "orientation", "failure_category"])
failCatDF


Unnamed: 0,dataset_type,dataset,subset,transform,orientation,failure_category,pass,beat_all_priors,best_prior,number,pass_percentage,beat_all_priors_percentage
0,medical,syntheticMRI2D,axial,wavelet,diagonal,,False,0,Laplace,8,0.0,62.0
1,medical,syntheticMRI2D,axial,wavelet,horizontal,,False,0,Gaussian,8,0.0,75.0
2,medical,syntheticMRI2D,axial,wavelet,vertical,,False,0,Gaussian,8,0.0,62.0
3,medical,syntheticMRI2D,coronal,wavelet,diagonal,,False,0,,8,0.0,75.0
4,medical,syntheticMRI2D,coronal,wavelet,horizontal,,False,0,,8,0.0,62.0
5,medical,syntheticMRI2D,coronal,wavelet,vertical,,False,0,,8,0.0,62.0
6,medical,syntheticMRI2D,sagittal,wavelet,diagonal,,False,0,,8,0.0,62.0
7,medical,syntheticMRI2D,sagittal,wavelet,horizontal,,False,0,,8,0.0,75.0
8,medical,syntheticMRI2D,sagittal,wavelet,vertical,,False,0,,8,0.0,75.0
9,medical,syntheticMRI3D,full,wavelet,aad,,False,0,,7,0.0,57.0


In [7]:
ordered_failcat_cols = ["actually_pass", "practically_pass", "TO DISCUSS", "interesting_failure", "trivial_failure"]
ordered_prior_cols = ["GenGamma", "Gaussian", "Laplace", "Student-T"]

# Remote Sensing

In [8]:
DATASET_TYPE = "remote sensing" 

In [9]:
medical = failCatDF.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical = medical.groupby(["dataset_type", "dataset", "subset", "transform", "orientation"]).first()[["pass_percentage", "beat_all_priors_percentage"]].reset_index()
medical

Unnamed: 0,dataset_type,dataset,subset,transform,orientation,pass_percentage,beat_all_priors_percentage
0,remote sensing,agriVision,full,fourier,,0.0,98.0
1,remote sensing,agriVision,full,learned,dual_color,0.0,100.0
2,remote sensing,agriVision,full,learned,eye,0.0,67.0
3,remote sensing,agriVision,full,learned,inside_out,0.0,57.0
4,remote sensing,agriVision,full,learned,misc,0.0,75.0
5,remote sensing,agriVision,full,learned,multi_edge,0.0,100.0
6,remote sensing,agriVision,full,learned,single_edge,0.0,100.0
7,remote sensing,agriVision,full,wavelet,diagonal,0.0,100.0
8,remote sensing,agriVision,full,wavelet,horizVert,0.0,100.0
9,remote sensing,pastis,full,fourier,,0.0,100.0


In [10]:

medical = main_df2.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical["number"] = 1
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "orientation"],
    columns="failure_category",
    values="number",
    aggfunc="sum",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
# Reorder columns if present in medical_pivot_percent
ordered_cols_present = [col for col in ordered_failcat_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,failure_category
dataset_type,dataset,subset,transform,orientation
remote sensing,agriVision,full,fourier,
remote sensing,agriVision,full,learned,dual_color
remote sensing,agriVision,full,learned,eye
remote sensing,agriVision,full,learned,inside_out
remote sensing,agriVision,full,learned,misc
remote sensing,agriVision,full,learned,multi_edge
remote sensing,agriVision,full,learned,single_edge
remote sensing,agriVision,full,wavelet,diagonal
remote sensing,agriVision,full,wavelet,horizVert
remote sensing,pastis,full,fourier,


In [11]:

medical = main_df2.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical["number"] = 1
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "orientation"],
    columns="best_prior",
    values="number",
    aggfunc="count",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
ordered_cols_present = [col for col in ordered_prior_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,best_prior,GenGamma,Gaussian,Laplace,Student-T
dataset_type,dataset,subset,transform,orientation,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
remote sensing,agriVision,full,fourier,,98.0,0.0,0.0,2.0
remote sensing,agriVision,full,learned,dual_color,100.0,0.0,0.0,0.0
remote sensing,agriVision,full,learned,eye,67.0,0.0,0.0,33.0
remote sensing,agriVision,full,learned,inside_out,57.0,0.0,0.0,43.0
remote sensing,agriVision,full,learned,misc,75.0,0.0,0.0,25.0
remote sensing,agriVision,full,learned,multi_edge,100.0,0.0,0.0,0.0
remote sensing,agriVision,full,learned,single_edge,100.0,0.0,0.0,0.0
remote sensing,agriVision,full,wavelet,diagonal,100.0,0.0,0.0,0.0
remote sensing,agriVision,full,wavelet,horizVert,100.0,0.0,0.0,0.0
remote sensing,pastis,full,fourier,,100.0,0.0,0.0,0.0


In [12]:

medical = main_df2.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical["number"] = 1
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "failure_category"],
    columns="best_prior",
    values="number",
    aggfunc="count",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
ordered_cols_present = [col for col in ordered_prior_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,best_prior,GenGamma,Gaussian,Laplace,Student-T
dataset_type,dataset,subset,transform,failure_category,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
remote sensing,agriVision,full,fourier,,98.0,0.0,0.0,2.0
remote sensing,agriVision,full,learned,,90.0,0.0,0.0,10.0
remote sensing,agriVision,full,wavelet,,100.0,0.0,0.0,0.0
remote sensing,pastis,full,fourier,,100.0,0.0,0.0,0.0
remote sensing,pastis,full,learned,,97.0,3.0,0.0,0.0
remote sensing,pastis,full,wavelet,,98.0,0.0,0.0,2.0
remote sensing,spaceNet,full,fourier,,100.0,0.0,0.0,0.0
remote sensing,spaceNet,full,learned,,95.0,0.0,2.0,4.0
remote sensing,spaceNet,full,wavelet,,95.0,5.0,0.0,0.0


# Natural Images

In [13]:
DATASET_TYPE = "natural" 

In [14]:
medical = failCatDF.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical = medical.groupby(["dataset_type", "dataset", "subset", "transform", "orientation"]).first()[["pass_percentage", "beat_all_priors_percentage"]].reset_index()
medical

Unnamed: 0,dataset_type,dataset,subset,transform,orientation,pass_percentage,beat_all_priors_percentage
0,natural,coco,indoor,wavelet,diagonal,0.0,100.0
1,natural,coco,indoor,wavelet,horizontal,0.0,81.0
2,natural,coco,indoor,wavelet,vertical,0.0,94.0
3,natural,coco,outdoor,wavelet,diagonal,0.0,100.0
4,natural,coco,outdoor,wavelet,horizontal,0.0,84.0
5,natural,coco,outdoor,wavelet,vertical,0.0,100.0
6,natural,segmentAnything,full,learned,dual_color,0.0,100.0
7,natural,segmentAnything,full,learned,eye,0.0,100.0
8,natural,segmentAnything,full,learned,inside_out,0.0,44.0
9,natural,segmentAnything,full,learned,misc,0.0,67.0


In [15]:

medical = failCatDF.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "orientation"],
    columns="failure_category",
    values="number",
    aggfunc="sum",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
# Reorder columns if present in medical_pivot_percent
ordered_cols_present = [col for col in ordered_failcat_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,failure_category
dataset_type,dataset,subset,transform,orientation
natural,coco,indoor,wavelet,diagonal
natural,coco,indoor,wavelet,horizontal
natural,coco,indoor,wavelet,vertical
natural,coco,outdoor,wavelet,diagonal
natural,coco,outdoor,wavelet,horizontal
natural,coco,outdoor,wavelet,vertical
natural,segmentAnything,full,learned,dual_color
natural,segmentAnything,full,learned,eye
natural,segmentAnything,full,learned,inside_out
natural,segmentAnything,full,learned,misc


In [16]:

medical = main_df2.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical["number"] = 1
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "orientation"],
    columns="best_prior",
    values="number",
    aggfunc="count",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
ordered_cols_present = [col for col in ordered_prior_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,best_prior,GenGamma,Gaussian,Laplace,Student-T
dataset_type,dataset,subset,transform,orientation,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
natural,coco,indoor,wavelet,diagonal,100.0,0.0,0.0,0.0
natural,coco,indoor,wavelet,horizontal,81.0,12.0,6.0,0.0
natural,coco,indoor,wavelet,vertical,94.0,6.0,0.0,0.0
natural,coco,outdoor,wavelet,diagonal,100.0,0.0,0.0,0.0
natural,coco,outdoor,wavelet,horizontal,84.0,6.0,6.0,3.0
natural,coco,outdoor,wavelet,vertical,100.0,0.0,0.0,0.0
natural,segmentAnything,full,learned,dual_color,100.0,0.0,0.0,0.0
natural,segmentAnything,full,learned,eye,100.0,0.0,0.0,0.0
natural,segmentAnything,full,learned,inside_out,44.0,44.0,11.0,0.0
natural,segmentAnything,full,learned,misc,67.0,17.0,0.0,17.0


In [17]:

medical = main_df2.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical["number"] = 1
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "failure_category"],
    columns="best_prior",
    values="number",
    aggfunc="count",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
ordered_cols_present = [col for col in ordered_prior_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,best_prior,GenGamma,Gaussian,Laplace,Student-T
dataset_type,dataset,subset,transform,failure_category,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
natural,coco,indoor,wavelet,,92.0,6.0,2.0,0.0
natural,coco,outdoor,wavelet,,95.0,2.0,2.0,1.0
natural,segmentAnything,full,learned,,89.0,8.0,2.0,2.0
natural,segmentAnything,full,wavelet,,95.0,1.0,3.0,1.0


# Medical

In [18]:
DATASET_TYPE = "medical"

In [19]:
medical = failCatDF.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical = medical.groupby(["dataset_type", "dataset", "subset", "transform", "orientation"]).first()[["pass_percentage", "beat_all_priors_percentage"]].reset_index()
medical

Unnamed: 0,dataset_type,dataset,subset,transform,orientation,pass_percentage,beat_all_priors_percentage
0,medical,syntheticMRI2D,axial,wavelet,diagonal,0.0,62.0
1,medical,syntheticMRI2D,axial,wavelet,horizontal,0.0,75.0
2,medical,syntheticMRI2D,axial,wavelet,vertical,0.0,62.0
3,medical,syntheticMRI2D,coronal,wavelet,diagonal,0.0,75.0
4,medical,syntheticMRI2D,coronal,wavelet,horizontal,0.0,62.0
5,medical,syntheticMRI2D,coronal,wavelet,vertical,0.0,62.0
6,medical,syntheticMRI2D,sagittal,wavelet,diagonal,0.0,62.0
7,medical,syntheticMRI2D,sagittal,wavelet,horizontal,0.0,75.0
8,medical,syntheticMRI2D,sagittal,wavelet,vertical,0.0,75.0
9,medical,syntheticMRI3D,full,wavelet,aad,0.0,57.0


In [20]:

medical = failCatDF.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "orientation"],
    columns="failure_category",
    values="number",
    aggfunc="sum",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
# Reorder columns if present in medical_pivot_percent
ordered_cols_present = [col for col in ordered_failcat_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,failure_category
dataset_type,dataset,subset,transform,orientation
medical,syntheticMRI2D,axial,wavelet,diagonal
medical,syntheticMRI2D,axial,wavelet,horizontal
medical,syntheticMRI2D,axial,wavelet,vertical
medical,syntheticMRI2D,coronal,wavelet,diagonal
medical,syntheticMRI2D,coronal,wavelet,horizontal
medical,syntheticMRI2D,coronal,wavelet,vertical
medical,syntheticMRI2D,sagittal,wavelet,diagonal
medical,syntheticMRI2D,sagittal,wavelet,horizontal
medical,syntheticMRI2D,sagittal,wavelet,vertical
medical,syntheticMRI3D,full,wavelet,aad


In [21]:

medical = main_df2.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical["number"] = 1
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "orientation"],
    columns="best_prior",
    values="number",
    aggfunc="count",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
ordered_cols_present = [col for col in ordered_prior_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,best_prior,GenGamma,Gaussian,Laplace,Student-T
dataset_type,dataset,subset,transform,orientation,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
medical,syntheticMRI2D,axial,wavelet,diagonal,62.0,12.0,12.0,12.0
medical,syntheticMRI2D,axial,wavelet,horizontal,75.0,25.0,0.0,0.0
medical,syntheticMRI2D,axial,wavelet,vertical,62.0,38.0,0.0,0.0
medical,syntheticMRI2D,coronal,wavelet,diagonal,75.0,0.0,0.0,12.0
medical,syntheticMRI2D,coronal,wavelet,horizontal,62.0,25.0,0.0,0.0
medical,syntheticMRI2D,coronal,wavelet,vertical,62.0,25.0,0.0,0.0
medical,syntheticMRI2D,sagittal,wavelet,diagonal,62.0,12.0,0.0,12.0
medical,syntheticMRI2D,sagittal,wavelet,horizontal,75.0,12.0,0.0,0.0
medical,syntheticMRI2D,sagittal,wavelet,vertical,75.0,12.0,0.0,0.0
medical,syntheticMRI3D,full,wavelet,aad,57.0,29.0,0.0,0.0


In [22]:

medical = main_df2.copy()
medical = medical[medical["dataset_type"] == DATASET_TYPE]
medical["number"] = 1
medical_pivot = medical.pivot_table(
    index=["dataset_type", "dataset", "subset", "transform", "failure_category"],
    columns="best_prior",
    values="number",
    aggfunc="count",
    fill_value=0,
)

# Calculate row sums for percentage calculation
row_sums = medical_pivot.sum(axis=1)
medical_pivot_percent = np.round(medical_pivot.div(row_sums, axis=0) * 100)

medical_pivot_percent
ordered_cols_present = [col for col in ordered_prior_cols if col in medical_pivot_percent.columns]
medical_pivot_percent = medical_pivot_percent[ordered_cols_present]
medical_pivot_percent

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,best_prior,GenGamma,Gaussian,Laplace,Student-T
dataset_type,dataset,subset,transform,failure_category,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
medical,syntheticMRI2D,axial,wavelet,,67.0,25.0,4.0,4.0
medical,syntheticMRI2D,coronal,wavelet,,67.0,17.0,0.0,4.0
medical,syntheticMRI2D,sagittal,wavelet,,71.0,12.0,0.0,4.0
medical,syntheticMRI3D,full,wavelet,,67.0,10.0,6.0,2.0
