In [2]:
import os
import git
from pathlib import Path
from scipy.spatial import Delaunay

ROOT_DIR =  Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
SAVE_FIGS = False

In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
from reporting import *
plots_path = os.path.join(ROOT_DIR, "publication", "paper", "draft_plots")
main_df = main_df.copy()
RERUN=False
np.random.seed(0)

In [4]:
main_df = main_df.fillna("None", inplace=False)

In [5]:
csv_path = Path(os.path.join(ROOT_DIR, 'publication', 'poster', 'CSVs', 'quick_access.csv'))

def in_hull(p, hull):
    if hasattr(hull, 'vertices') and not isinstance(hull, Delaunay):
        hull = Delaunay(hull.points)
    elif not isinstance(hull, Delaunay):
        hull = Delaunay(hull)
    return (hull.find_simplex(p) >= 0)

if RERUN or not csv_path.exists():
    x_vals = np.linspace(0, 20, 1000)
    eta_vals = 1.5 + np.zeros_like(x_vals)
    roi_beta = (eta_vals) / x_vals
    roi = 1 / roi_beta
    def line_intersects_hull(hull):
        if hull is None or hull is np.nan:
            return False
        line_points = np.column_stack((x_vals, roi))
        return np.any(in_hull(line_points, hull))

    main_df['intersect_roi'] = main_df['hull'].apply(lambda h: line_intersects_hull(h) if type(h) != str else False)
    main_df.drop('hull', axis=1).to_csv(csv_path)

main_df = pd.read_csv(csv_path).drop('Unnamed: 0', axis=1)
main_df['intersect_roi'] = (main_df['intersect_roi'] == 'True').astype(int)
main_df.head()


Unnamed: 0,group,obs_var,var_lower,var_upper,obs_kurt,kurt_lower,kurt_upper,total_samples,initial_r,initial_eta,...,dataset_type,hull,best_beta,best_1/beta,beat_all_priors,best_prior,failure_category,failure_type,which_ones,intersect_roi
0,2,1292.9467,749.10767,2108.4307,421.84155,123.97443,696.23804,45000.0,0.1,2.5,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,38.0,0.026316,1,GenGamma,practically_pass,pass,,1
1,5,662.6544,382.08975,1248.6178,628.03,108.21224,1663.2395,117000.0,0.1,2.0,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,43.333333,0.023077,1,GenGamma,practically_pass,pass,,0
2,8,256.6339,133.79204,650.8456,1363.7307,99.3656,3639.2358,189000.0,0.1,2.9,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,53.333333,0.01875,1,GenGamma,practically_pass,pass,,1
3,11,121.96157,57.215298,390.19293,2698.3176,93.68093,4889.278,495000.0,0.1,3.1,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,55.555556,0.018,1,GenGamma,practically_pass,pass,,1
4,14,48.46801,23.179575,151.67955,2654.2952,86.30504,4711.6294,1134000.0,0.1,3.4,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,56.666667,0.017647,1,GenGamma,practically_pass,pass,,1


In [10]:
granularity = ['dataset', 'transform']
temp = main_df.copy()[(main_df['dataset'] != 'standardTesting')] #  & (main_df['transform'] != 'learned')
temp['practically_pass'] = (temp['failure_category'] == 'practically_pass')
temp['actually_pass'] = (temp['failure_category'] == 'actually_pass')
summary_df = (temp.groupby(granularity)[['actually_pass','practically_pass','intersect_roi']].mean() * 100).round(1)
summary_df

Unnamed: 0_level_0,Unnamed: 1_level_0,actually_pass,practically_pass,intersect_roi
dataset,transform,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
agriVision,fourier,2.3,93.2,70.5
agriVision,learned,0.0,78.3,53.3
agriVision,wavelet,13.9,86.1,73.6
coco,learned,0.0,57.9,33.3
coco,wavelet,25.0,26.6,27.1
pastis,fourier,90.9,9.1,100.0
pastis,learned,0.0,85.7,74.3
pastis,wavelet,57.1,42.9,96.4
segmentAnything,learned,0.0,73.4,50.0
segmentAnything,wavelet,25.0,60.2,36.1


In [51]:
granularity = ['dataset', 'transform']
temp = main_df.copy()[(main_df['dataset'] != 'standardTesting')]# & (main_df['transform'] != 'learned')]
temp['practically_pass'] = (temp['failure_category'] == 'practically_pass')
temp['actually_pass'] = (temp['failure_category'] == 'actually_pass')
temp['borderline'] = (temp['failure_category'] == 'borderline')
summary_df = (temp.groupby(granularity)[['actually_pass','practically_pass','intersect_roi']].mean() * 100).round(1).sort_values('transform')
summary_df.reset_index()#.drop('dataset', axis=1)

Unnamed: 0,dataset,transform,actually_pass,practically_pass,intersect_roi
0,agriVision,fourier,2.3,93.2,70.5
1,pastis,fourier,90.9,9.1,100.0
2,spaceNet,fourier,0.0,100.0,50.0
3,agriVision,learned,0.0,78.3,53.3
4,coco,learned,0.0,57.9,33.3
5,pastis,learned,0.0,85.7,74.3
6,segmentAnything,learned,0.0,73.4,50.0
7,spaceNet,learned,0.0,42.9,46.4
8,agriVision,wavelet,13.9,86.1,73.6
9,coco,wavelet,25.0,26.6,27.1


In [23]:
save_path = Path(os.path.join(ROOT_DIR, 'publication', 'poster', 'CSVs', 'summary_table.csv'))
summary_df.to_csv(save_path) #.drop('learned')

In [46]:
summary_df = summary_df.reset_index()
summary_df#[summary_df['transform'] == 'wavelet']

Unnamed: 0,index,dataset,transform,actually_pass,practically_pass,intersect_roi
0,0,agriVision,fourier,2.3,93.2,70.5
1,1,pastis,fourier,90.9,9.1,100.0
2,2,spaceNet,fourier,0.0,100.0,50.0
3,3,agriVision,learned,0.0,78.3,53.3
4,4,coco,learned,0.0,57.9,33.3
5,5,pastis,learned,0.0,85.7,74.3
6,6,segmentAnything,learned,0.0,73.4,50.0
7,7,spaceNet,learned,0.0,42.9,46.4
8,8,agriVision,wavelet,13.9,86.1,73.6
9,9,coco,wavelet,25.0,26.6,27.1


In [52]:
(temp[['actually_pass','practically_pass','intersect_roi', 'borderline']].mean(numeric_only=True)*100).round(1)

actually_pass       15.5
practically_pass    56.3
intersect_roi       50.2
borderline           4.8
dtype: float64

In [54]:
main_df[main_df['dataset'] !='standardTesting']

Unnamed: 0,group,obs_var,var_lower,var_upper,obs_kurt,kurt_lower,kurt_upper,total_samples,initial_r,initial_eta,...,dataset_type,hull,best_beta,best_1/beta,beat_all_priors,best_prior,failure_category,failure_type,which_ones,intersect_roi
0,2,1292.946700,749.107670,2108.430700,421.841550,123.974430,696.238040,45000.0,0.1,2.5,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,38.000000,0.026316,1,GenGamma,practically_pass,pass,,1
1,5,662.654400,382.089750,1248.617800,628.030000,108.212240,1663.239500,117000.0,0.1,2.0,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,43.333333,0.023077,1,GenGamma,practically_pass,pass,,0
2,8,256.633900,133.792040,650.845600,1363.730700,99.365600,3639.235800,189000.0,0.1,2.9,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,53.333333,0.018750,1,GenGamma,practically_pass,pass,,1
3,11,121.961570,57.215298,390.192930,2698.317600,93.680930,4889.278000,495000.0,0.1,3.1,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,55.555556,0.018000,1,GenGamma,practically_pass,pass,,1
4,14,48.468010,23.179575,151.679550,2654.295200,86.305040,4711.629400,1134000.0,0.1,3.4,...,remote sensing,<scipy.spatial._qhull.ConvexHull object at 0x0...,56.666667,0.017647,1,GenGamma,practically_pass,pass,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1150,4,241418.060000,225806.880000,257381.550000,4.450259,4.034465,4.879806,22538.0,0.5,-0.5,...,medical,<scipy.spatial._qhull.ConvexHull object at 0x0...,1.400000,0.714286,1,GenGamma,actually_pass,pass,,0
1151,5,33947.695000,31871.596000,36147.625000,3.966855,3.001096,5.099455,296776.0,0.1,8.6,...,medical,<scipy.spatial._qhull.ConvexHull object at 0x0...,101.000000,0.009901,1,GenGamma,actually_pass,pass,,1
1152,6,6416.669400,5792.756300,7090.980500,13.477583,10.768813,16.435010,2880648.0,0.1,4.2,...,medical,<scipy.spatial._qhull.ConvexHull object at 0x0...,56.000000,0.017857,1,GenGamma,practically_pass,pass,,1
1153,7,397.892600,357.501070,450.380680,17.566162,9.929181,54.616486,25362406.0,0.1,2.2,...,medical,<scipy.spatial._qhull.ConvexHull object at 0x0...,37.000000,0.027027,1,GenGamma,practically_pass,pass,,1


In [68]:
main_df[main_df['failure_type'] == 'pass']['beat_all_priors'].value_counts()

beat_all_priors
1    785
0      9
Name: count, dtype: int64

In [64]:
main_df['failure_category'].value_counts()

failure_category
practically_pass       626
trivial_failure        219
actually_pass          168
interesting_failure     76
borderline              66
Name: count, dtype: int64

In [59]:
(temp.groupby(['dataset', 'transform'])[['kstest_stat_best', 'actually_pass','practically_pass','intersect_roi']].median(numeric_only=True)*1)#.round(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,kstest_stat_best,actually_pass,practically_pass,intersect_roi
dataset,transform,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
agriVision,fourier,0.004708,0.0,1.0,1.0
agriVision,learned,0.005417,0.0,1.0,1.0
agriVision,wavelet,0.00502,0.0,1.0,1.0
coco,learned,0.009714,0.0,1.0,0.0
coco,wavelet,0.011939,0.0,0.0,0.0
pastis,fourier,0.001526,1.0,0.0,1.0
pastis,learned,0.006469,0.0,1.0,1.0
pastis,wavelet,0.004502,1.0,0.0,1.0
segmentAnything,learned,0.004959,0.0,1.0,0.5
segmentAnything,wavelet,0.004032,0.0,1.0,0.0
