In [1]:
import pathlib
import pandas as pd

In [2]:
data = []
for metric_name in ['DiceAccuracy(threshold=0.5,smoothing=1)','DiceLoss(smoothing=1)']:
    table = []
    for csv_path in pathlib.Path(metric_name).rglob('*.csv'):
        metric_name,model_name,is_pretrained,purpose,csv_name = csv_path.as_posix().split('/')
        df = pd.read_csv(csv_path)
        df['ModelName'] = model_name
        df['IsPretrained'] = is_pretrained
        df['Purpose'] = purpose
        df['TrialNumber'] = csv_name.replace(f'_{purpose}','_').split('_')[-2]
        df = df.rename({'Step':'ImagesCount'},axis='columns')
        df = df.rename({'Value':metric_name},axis='columns')
        df = df.drop('Wall time',axis='columns')
        table.append(df)
    data.append(pd.concat(table).set_index(['ImagesCount','ModelName','IsPretrained','Purpose','TrialNumber']))
data = pd.concat(data,axis='columns')
data = data.reset_index()
data = data.replace({'Purpose':{'validation_wo_arg':'validation_w/o_arg'}})
data = data.replace({'IsPretrained':{'Pretrained':'pretrained','NonPretrained':'non-pretrained'}})

images_count_to_epoch = {i:e+1 for e,i in enumerate(sorted(data['ImagesCount'].value_counts(ascending=False)[:30].index))}
epoch_data = data.loc[data['ImagesCount'].isin(images_count_to_epoch.keys())]
epoch_data.insert(0,'Epoch',epoch_data['ImagesCount'].replace(images_count_to_epoch))
epoch_data = epoch_data.drop('ImagesCount',axis='columns')

In [3]:
def save_data(data,kind):
    seven_models_10_trails = data.loc[~data['TrialNumber'].isin(f'{x:0>2}' for x in range(11,21))]
    six_models_10_trails = seven_models_10_trails.query("ModelName!='AttDs'")
    EfficientNet_vs_AttDs_20_trials = data.loc[data['ModelName'].isin(['EfficientNet','AttDs'])].query("IsPretrained=='pretrained'")
    EfficientNet_vs_AttDs_10_trials = EfficientNet_vs_AttDs_20_trials.loc[~EfficientNet_vs_AttDs_20_trials['TrialNumber'].isin(f'{x:0>2}' for x in range(11,21))]
    
    tables = {
        'all':data,
        '7_models_10_trails':seven_models_10_trails,
        '6_models_10_trails':six_models_10_trails,
        'EfficientNet_vs_AttDs_20_trials':EfficientNet_vs_AttDs_20_trials,
        'EfficientNet_vs_AttDs_10_trials':EfficientNet_vs_AttDs_10_trials
    }
    
    index_name = {'step_included':'ImagesCount','epoch_only':'Epoch'}
    for folder_name, table in tables.items():
        folder = pathlib.Path('summary_data',kind,folder_name)
        stat = table.groupby(['ModelName','IsPretrained','Purpose',index_name[kind]]).aggregate(['median','mean','std','count']).drop('TrialNumber',axis='columns')
        stat.columns = list(' '.join(x) for x in stat.columns)
        stat = stat.reset_index()
        
        folder.mkdir()
        table.to_csv(folder.joinpath('raw.csv'))
        stat.to_csv(folder.joinpath('statistics.csv'))
    
    return None

In [4]:
save_data(data,'step_included')
save_data(epoch_data,'epoch_only')