In [30]:
import pandas as pd 
import wandb
api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("viktor-cikojevic/three_d_seg")

summary_list, config_list, name_list = [], [], []
for run in runs: 
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files 
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append(
        {k: v for k,v in run.config.items()
          if not k.startswith('_')})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

df = pd.DataFrame({
    "summary": summary_list,
    "config": config_list,
    "name": name_list
    })


In [31]:
df.head()   

Unnamed: 0,summary,config,name
0,"{'epoch': 7, 'crude_val_loss': 0, 'trainer/glo...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...
1,"{'lr': 0.0003162277571391314, '_step': 888, 'e...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...
2,"{'surface_dice': 0.753478467464447, 'crude_val...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...
3,"{'_runtime': 8357.782411575317, '_timestamp': ...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...
4,"{'epoch': 6, 'crude_f1': 0, 'f1_score': 0.7912...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",Segformer_2222-c256x1-bs8-llr-4.5-t111-sm0-202...


In [32]:
group_names = []
for i in range(len(df)):
    try:
        group_name = df.config.iloc[i]['group_name']
        group_names.append(group_name)
    except:
        group_names.append('None')
    
df['group_name'] = group_names

In [33]:
df = df[(df['group_name'] == 'r18_replace_bn_with_ln') | (df['group_name'] == 'r18_upsample')].reset_index(drop=True)
df

Unnamed: 0,summary,config,name,group_name
0,"{'epoch': 7, 'crude_val_loss': 0, 'trainer/glo...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,r18_replace_bn_with_ln
1,"{'lr': 0.0003162277571391314, '_step': 888, 'e...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,r18_replace_bn_with_ln
2,"{'surface_dice': 0.753478467464447, 'crude_val...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,r18_replace_bn_with_ln
3,"{'_runtime': 8357.782411575317, '_timestamp': ...","{'loss': [{'type': 'BCELoss', 'weight': 0.001}...",SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,r18_upsample


In [34]:
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    if not isinstance(d, (dict, list)):
        # Return immediately if d is neither a dictionary nor a list
        return {parent_key: d}

    for k, v in d.items() if isinstance(d, dict) else enumerate(d):
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            for i, item in enumerate(v):
                items.extend(flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


In [35]:
flat_configs = []
# loop over rows
for indx_row, row in df.iterrows():
    # Flatten the config   
    config = row['config']
    flat_config = flatten_dict(config)
    flat_configs.append(flat_config)

In [36]:
df = pd.DataFrame(flat_configs)
df.head()

Unnamed: 0,loss_0_type,loss_0_weight,loss_1_type,loss_1_weight,loss_2_type,loss_2_kwargs_smooth,loss_2_weight,loss_3_type,loss_3_kwargs_gamma,loss_3_weight,...,early_stopping_min_delta,batch_transform_kwargs_mixup_prob,batch_transform_kwargs_alpha_mixup,batch_transform_kwargs_cutmix_prob,batch_transform_kwargs_alpha_cutmix,experiment_name,quit_immediately,best_surface_dice,val_check_interval,apparent_batch_size
0,BCELoss,0.001,MccLoss,0.001,DiceLoss,0.0001,0.4,FocalLoss,2,0.6,...,0,0,0.4,0.3,0.4,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,False,0.877691,20000,16
1,BCELoss,0.001,MccLoss,0.001,DiceLoss,0.0001,0.4,FocalLoss,2,0.6,...,0,0,0.4,0.3,0.4,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,False,0.829338,20000,16
2,BCELoss,0.001,MccLoss,0.001,DiceLoss,0.0001,0.4,FocalLoss,2,0.6,...,0,0,0.4,0.3,0.4,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,False,0.818368,20000,16
3,BCELoss,0.001,MccLoss,0.001,DiceLoss,0.0001,0.4,FocalLoss,2,0.6,...,0,0,0.4,0.3,0.4,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,False,0.874831,20000,16


In [37]:
# for col in df.columns:
#     if 'batch' in col:
#     	print(col)

In [38]:
# Remove columns that keep with the same value
columns = df.columns
for column in columns:
    if len(df[column].unique()) == 1:
        df = df.drop(column, axis=1)
# sort by best_surface_dice
df = df.sort_values(by=['best_surface_dice'], ascending=False).reset_index(drop=True)
df

Unnamed: 0,model_type,model_kwargs_replace_batch_norm_with_layer_norm,group_name,early_stopping_patience,experiment_name,best_surface_dice
0,SMPModel,False,r18_replace_bn_with_ln,3,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,0.877691
1,SMPModelUpsampleBy2,,r18_upsample,3,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,0.874831
2,SMPModel,True,r18_replace_bn_with_ln,3,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,0.829338
3,SMPModel,True,r18_replace_bn_with_ln,5,SMP_Unet_resnet18_imagenet-c512x1-bs16-llr-3.5...,0.818368
