In [1]:
import os
import git
from pathlib import Path

ROOT_DIR =  Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
SAVE_FIGS = False

os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
from reporting import *
plots_path = os.path.join(ROOT_DIR, "publication", "paper", "draft_plots")
save_path_with_hull = Path(os.path.join(ROOT_DIR, 'publication', 'paper', 'CSVs', 'final_results_with_hull.pickle'))
main_df = pd.read_pickle(save_path_with_hull)
RERUN=False
np.random.seed(0)

In [22]:
files_in_transformed_data = os.listdir(os.path.join(ROOT_DIR, "transformed-data"))
files_in_transformed_data = [f for f in files_in_transformed_data if not (f.endswith('-size.pickle') or f.endswith('.csv'))]
files_in_transformed_data = [f for f in files_in_transformed_data if 'standardTesting' not in f]
print(files_in_transformed_data)

['coco-outdoor-wavelet-diagonal-red.pickle', 'syntheticMRI2D-sagittal-wavelet-vertical.pickle', 'pastis-full-wavelet-diagonal-gray.pickle', 'syntheticMRI3D-full-wavelet-add.pickle', 'coco-outdoor-wavelet-vertical-red.pickle', 'syntheticMRI2D-coronal-wavelet-horizontal.pickle', 'coco-indoor-wavelet-horizontal-red.pickle', 'pastis-full-fourier-blue.pickle', 'segmentAnything-full-wavelet-horizontal-green.pickle', 'agriVision-full-fourier-green.pickle', 'coco-indoor-wavelet-diagonal-green.pickle', 'syntheticMRI2D-sagittal-wavelet-diagonal.pickle', 'agriVision-full-wavelet-horizVert-green.pickle', 'syntheticMRI3D-full-wavelet-dad.pickle', 'syntheticMRI2D-axial-wavelet-diagonal.pickle', 'spaceNet-full-wavelet-horizVert-gray.pickle', 'coco-indoor-wavelet-horizontal-blue.pickle', 'coco-indoor-wavelet-vertical-gray.pickle', 'coco-outdoor-wavelet-diagonal-gray.pickle', 'syntheticMRI3D-full-wavelet-dda.pickle', 'coco-outdoor-wavelet-horizontal-blue.pickle', 'agriVision-full-wavelet-diagonal-red.p

In [29]:
bad_files = ["coco-indoor.pickle", "coco-indoor-wavelet-learned.pickle"]

In [None]:
results = []
for file in files_in_transformed_data:
    name = file[:-7]
    name_parts = name.split('-')
    if file in bad_files:
        continue
    dataset, subset, transform,  = name.split('-')[:3]
    if dataset == "syntheticMRI2D":
        orientation = name_parts[3]
        channel = np.nan
    elif dataset == "syntheticMRI3D":
        orientation = name_parts[3]
        channel = np.nan
    elif transform == "fourier":
        orientation = "fourier"
        channel = name_parts[3]
    elif transform == "learned":
        orientation = "learned"
        channel = "learned"
    else:
        orientation = name_parts[3]
        channel = name_parts[4]
    data_dict = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", file))
    for group in data_dict.keys():
        data = np.sort(data_dict[group])
        if len(data) == 0:
            continue
        result_df = pd.DataFrame({
            'dataset': [dataset],
            'subset': [subset],
            'transform': [transform],
            'orientation': [orientation],
            'channel': [channel],
            'largest': [data[-1]],
            '5th_largest': [data[-5] if len(data) >= 5 else np.nan],
            '10th_largest': [data[-10] if len(data) >= 10 else np.nan],
            'smallest': [data[0]],
            '5th_smallest': [data[4] if len(data) >= 5 else np.nan],
            '10th_smallest': [data[9] if len(data) >= 10 else np.nan]
        })
        results.append(result_df)
big_df = pd.concat(results, ignore_index=True)


Unnamed: 0,dataset,subset,transform,orientation,channel,largest,5th_largest,10th_largest,smallest,5th_smallest,10th_smallest,largest_ratio_5,largest_ratio_10,smallest_ratio_5,smallest_ratio_10
0,agriVision,full,fourier,fourier,blue,879.080903,277.383149,214.957980,-960.251741,-273.939473,-206.767568,6.423387,8.763420,6.467639,8.932063
1,agriVision,full,fourier,fourier,gray,765.249209,234.990690,183.808504,-856.438866,-249.705479,-191.983160,6.457287,8.609113,6.387079,8.734236
2,agriVision,full,fourier,fourier,green,917.181420,254.866586,196.725627,-1013.762157,-299.996352,-206.670328,6.585265,8.792938,6.513617,9.066858
3,agriVision,full,fourier,fourier,red,1174.376641,333.328066,236.752563,-1347.064706,-341.205097,-232.859068,6.032936,9.033726,6.421487,9.635047
4,agriVision,full,learned,learned,learned,753.729710,246.595823,188.386164,-767.669439,-252.524204,-190.501899,3.368549,4.413428,3.400447,4.479953
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,spaceNet,full,wavelet,diagonal,red,268.866453,167.152909,144.581975,-213.637369,-165.565024,-149.697336,2.027077,2.363626,1.794632,2.075392
74,spaceNet,full,wavelet,horizVert,blue,478.770991,300.394093,260.467443,-452.628271,-303.774332,-250.979607,2.183963,2.546405,2.168279,2.570690
75,spaceNet,full,wavelet,horizVert,gray,436.208920,303.079782,262.380964,-481.099241,-304.339677,-262.242167,1.877307,2.158416,1.999221,2.299987
76,spaceNet,full,wavelet,horizVert,green,466.449742,315.608535,273.440251,-515.947244,-320.506719,-281.928197,1.894506,2.170119,2.048480,2.332926


In [48]:
pd.set_option('display.max_rows', None)

In [49]:
big_df["largest_ratio_5"] = big_df["largest"] / big_df["5th_largest"]
big_df["largest_ratio_10"] = big_df["largest"] / big_df["10th_largest"]
big_df["smallest_ratio_5"] =  big_df["smallest"] / big_df["5th_smallest"]
big_df["smallest_ratio_10"] = big_df["smallest"]/big_df["10th_smallest"]
big_df.groupby(['dataset', 'subset', 'transform', 'orientation', 'channel']).mean().reset_index()

Unnamed: 0,dataset,subset,transform,orientation,channel,largest,5th_largest,10th_largest,smallest,5th_smallest,10th_smallest,largest_ratio_5,largest_ratio_10,smallest_ratio_5,smallest_ratio_10
0,agriVision,full,fourier,fourier,blue,879.080903,277.383149,214.95798,-960.251741,-273.939473,-206.767568,6.423387,8.76342,6.467639,8.932063
1,agriVision,full,fourier,fourier,gray,765.249209,234.99069,183.808504,-856.438866,-249.705479,-191.98316,6.457287,8.609113,6.387079,8.734236
2,agriVision,full,fourier,fourier,green,917.18142,254.866586,196.725627,-1013.762157,-299.996352,-206.670328,6.585265,8.792938,6.513617,9.066858
3,agriVision,full,fourier,fourier,red,1174.376641,333.328066,236.752563,-1347.064706,-341.205097,-232.859068,6.032936,9.033726,6.421487,9.635047
4,agriVision,full,learned,learned,learned,753.72971,246.595823,188.386164,-767.669439,-252.524204,-190.501899,3.368549,4.413428,3.400447,4.479953
5,agriVision,full,wavelet,diagonal,blue,1753.880606,733.943328,533.779981,-1760.608279,-755.781451,-537.754254,2.954482,4.013561,3.002346,4.17305
6,agriVision,full,wavelet,diagonal,gray,1350.232354,724.066624,451.700028,-1895.726074,-725.272489,-464.217368,2.731626,3.899414,2.884375,4.322339
7,agriVision,full,wavelet,diagonal,green,1338.255876,719.020343,496.676381,-2025.388487,-802.415318,-468.999043,2.781045,3.893791,2.88542,4.436224
8,agriVision,full,wavelet,diagonal,red,2021.722946,798.718124,521.76914,-2445.577499,-736.991483,-511.259328,3.351292,4.955006,3.88353,5.560435
9,agriVision,full,wavelet,horizVert,blue,4433.894446,2188.696108,1575.229341,-3950.164978,-1748.204492,-1241.957928,2.915606,3.972423,2.707532,3.862535


In [50]:
big_df.groupby(['dataset', 'subset', 'transform', 'orientation', 'channel']).median().reset_index()

Unnamed: 0,dataset,subset,transform,orientation,channel,largest,5th_largest,10th_largest,smallest,5th_smallest,10th_smallest,largest_ratio_5,largest_ratio_10,smallest_ratio_5,smallest_ratio_10
0,agriVision,full,fourier,fourier,blue,797.941162,146.06279,105.510757,-814.014557,-143.659615,-103.333607,3.756822,5.096053,3.709992,4.983147
1,agriVision,full,fourier,fourier,gray,671.636383,130.606712,99.674248,-733.502167,-131.430683,-97.839085,4.457774,5.88947,4.163038,5.320992
2,agriVision,full,fourier,fourier,green,772.398041,144.063248,108.492031,-733.630829,-147.310692,-107.328495,5.1659,6.768254,4.515686,6.328045
3,agriVision,full,fourier,fourier,red,849.578827,169.919167,112.901943,-1109.733337,-171.382599,-113.600319,4.503709,6.990084,4.45696,6.665458
4,agriVision,full,learned,learned,learned,750.845712,240.503498,169.760432,-752.740485,-241.713862,-175.933934,3.250136,4.308226,3.1317,4.271408
5,agriVision,full,wavelet,diagonal,blue,906.662354,314.266479,240.752777,-757.962341,-328.429779,-239.900131,2.885011,3.773718,3.157674,4.32807
6,agriVision,full,wavelet,diagonal,gray,823.057373,301.740967,207.526337,-625.888977,-276.202881,-194.442291,2.727695,4.010968,2.569566,3.458057
7,agriVision,full,wavelet,diagonal,green,1044.875366,336.323364,220.479828,-959.005493,-294.098206,-217.948334,3.160086,4.232005,2.979641,3.772877
8,agriVision,full,wavelet,diagonal,red,1548.32959,321.6521,230.937759,-925.414062,-306.113129,-229.845016,3.25696,5.157069,3.719731,5.157778
9,agriVision,full,wavelet,horizVert,blue,1755.130493,763.316772,532.25708,-1577.985962,-683.551208,-499.695068,2.579203,4.095389,2.514376,3.465194


In [None]:
pd.reset_option('display.max_rows')