# **Preprocessing the CSV Generated by Weights & Biases**

In this notebook, the CSV exported from w&b is processed in order to generate a CSV file containing only the hyperparameter search results and generate another CSV file containing the RGB color channel analysis.

In [1]:
import pandas as pd

## Generate CSV for hyperparameter search

In [2]:
#Read the CSV file exported from wandb
df_hyperparams = pd.read_csv('../data/results/wandb_export.csv')

In [3]:
#Display the names of all columns
for col in df_hyperparams.columns:
    print(col)

Name
State
Notes
User
Tags
Created
Runtime
Sweep
activation
batch_size
conv_layers
dense_layers
dense_units_1
dense_units_2
dense_units_3
dropout_rate
epochs
filters_layer_1
filters_layer_2
filters_layer_3
filters_layer_4
kernel_size
learning_rate
optimizer
strides
accuracy_train
accuracy_val
epoch/epoch
epoch/accuracy
epoch/val_accuracy
epoch/loss
epoch/val_loss
epoch/learning_rate
f1_train
f1_val
precision_train
precision_val
recall_train
recall_val


In [None]:
#Select and order the columns of hyperparameters and metrics
columns_to_keep = ['Name', 'conv_layers', 'filters_layer_1', 'filters_layer_2', 'filters_layer_3', 'kernel_size', 'strides', 
                   'dense_layers', 'dense_units_1', 'dense_units_2', 'dense_units_3', 'dropout_rate', 'batch_size', 
                   'epoch/epoch', 'epoch/accuracy', 'epoch/val_accuracy', 'epoch/loss', 'epoch/val_loss', 'epoch/learning_rate',
                   'accuracy_train', 'accuracy_val', 'precision_train', 'precision_val', 'recall_train', 'recall_val', 'f1_train', 'f1_val']
df_hyperparams = df_hyperparams[columns_to_keep]

In [5]:
#Rename the metrics columns
df_hyperparams = df_hyperparams.rename(columns={'accuracy_train': 'Accuracy Train',
                                                'accuracy_val': 'Accuracy Validation',
                                                'precision_train': 'Precision Train',
                                                'precision_val': 'Precision Validation',
                                                'recall_train': 'Recall Train',
                                                'recall_val': 'Recall Validation',
                                                'f1_train': 'F1 Train',
                                                'f1_val': 'F1 Validation'})

In [6]:
#Remove models related to the channel analysis, keeping only hyperparameter search experiments
channels_to_remove = ['green_blue', 'red_blue', 'red_green', 'red', 'green', 'blue']
df_hyperparams = df_hyperparams[~df_hyperparams['Name'].isin(channels_to_remove)]

In [7]:
#Reverse row order so experiments appear in execution order
df_hyperparams = df_hyperparams.iloc[::-1].reset_index(drop=True)

In [8]:
#Display resulting dataframe
df_hyperparams

Unnamed: 0,Name,conv_layers,filters_layer_1,filters_layer_2,filters_layer_3,kernel_size,strides,dense_layers,dense_units_1,dense_units_2,...,epoch/val_loss,epoch/learning_rate,Accuracy Train,Accuracy Validation,Precision Train,Precision Validation,Recall Train,Recall Validation,F1 Train,F1 Validation
0,"cl:1, fl1:32, ks:3, st:2, dl:1, du1:64, dr:0.4...",1,32,,,3,2,1,64,,...,1.47873,0.0005,0.488889,0.355556,0.452632,0.478261,0.518072,0.392857,0.483146,0.431373
1,"cl:2, fl1:32, fl2:32, ks:3, st:2, dl:1, du1:64...",2,32,32.0,,3,2,1,64,,...,0.317314,6.3e-05,0.794444,0.844444,0.709091,0.83871,0.939759,0.928571,0.80829,0.881356
2,"cl:3, fl1:32, fl2:32, fl3:32, ks:3, st:2, dl:1...",3,32,32.0,32.0,3,2,1,64,,...,0.220558,6.3e-05,0.877778,0.911111,0.808081,0.9,0.963855,0.964286,0.879121,0.931034
3,"cl:3, fl1:16, fl2:16, fl3:16, ks:3, st:2, dl:1...",3,16,16.0,16.0,3,2,1,64,,...,0.254325,0.00025,0.85,0.911111,0.791667,0.9,0.915663,0.964286,0.849162,0.931034
4,"cl:3, fl1:16, fl2:16, fl3:32, ks:3, st:2, dl:1...",3,16,16.0,32.0,3,2,1,64,,...,0.350057,0.000125,0.972222,0.955556,0.953488,0.964286,0.987952,0.964286,0.970414,0.964286
5,"cl:3, fl1:16, fl2:16, fl3:64, ks:3, st:2, dl:1...",3,16,16.0,64.0,3,2,1,64,,...,0.465951,0.0005,0.972222,0.955556,0.9875,0.964286,0.951807,0.964286,0.969325,0.964286
6,"cl:3, fl1:16, fl2:32, fl3:16, ks:3, st:2, dl:1...",3,16,32.0,16.0,3,2,1,64,,...,0.252296,3.1e-05,0.994444,0.933333,1.0,0.962963,0.987952,0.928571,0.993939,0.945455
7,"cl:3, fl1:16, fl2:32, fl3:32, ks:3, st:2, dl:1...",3,16,32.0,32.0,3,2,1,64,,...,0.822564,0.00025,0.677778,0.822222,0.643678,0.884615,0.674699,0.821429,0.658824,0.851852
8,"cl:3, fl1:16, fl2:32, fl3:64, ks:3, st:2, dl:1...",3,16,32.0,64.0,3,2,1,64,,...,0.572304,0.00025,0.744444,0.911111,0.698925,0.961538,0.783133,0.892857,0.738636,0.925926
9,"cl:3, fl1:16, fl2:64, fl3:16, ks:3, st:2, dl:1...",3,16,64.0,16.0,3,2,1,64,,...,0.611199,0.00025,0.705556,0.866667,0.653061,0.923077,0.771084,0.857143,0.707182,0.888889


In [9]:
#Save processed df as CSV
df_hyperparams.to_csv('../data/results/hyperparameter_search.csv', index=False)

## Generate CSV for RGB channel analysis

In [10]:
#Read the CSV file exported from wandb
df_channels = pd.read_csv('../data/results/wandb_export.csv')

In [11]:
#Display the names of all columns
for col in df_channels.columns:
    print(col)

Name
State
Notes
User
Tags
Created
Runtime
Sweep
activation
batch_size
conv_layers
dense_layers
dense_units_1
dense_units_2
dense_units_3
dropout_rate
epochs
filters_layer_1
filters_layer_2
filters_layer_3
filters_layer_4
kernel_size
learning_rate
optimizer
strides
accuracy_train
accuracy_val
epoch/epoch
epoch/accuracy
epoch/val_accuracy
epoch/loss
epoch/val_loss
epoch/learning_rate
f1_train
f1_val
precision_train
precision_val
recall_train
recall_val


In [12]:
#Sort columns to keep
columns_to_keep = ['Name', 'epoch/epoch', 'epoch/accuracy', 'epoch/val_accuracy', 'epoch/loss', 'epoch/val_loss', 'epoch/learning_rate',
                   'accuracy_train', 'accuracy_val', 'precision_train', 'precision_val', 'recall_train', 'recall_val', 'f1_train', 'f1_val']
df_channels = df_channels[columns_to_keep]

In [13]:
#Rename the metrics columns
df_channels = df_channels.rename(columns={'accuracy_train': 'Accuracy Train',
                                          'accuracy_val': 'Accuracy Validation',
                                          'precision_train': 'Precision Train',
                                          'precision_val': 'Precision Validation',
                                          'recall_train': 'Recall Train',
                                          'recall_val': 'Recall Validation',
                                          'f1_train': 'F1 Train',
                                          'f1_val': 'F1 Validation'})

In [14]:
#Keep only channel analysis models
channel_models = ['green_blue', 'red_blue', 'red_green', 'red', 'green', 'blue',
                   'cl:3, fl1:32, fl2:32, fl3:64, ks:3, st:2, dl:1, du1:64, dr:0.4, bs:32']
df_channels = df_channels[df_channels['Name'].isin(channel_models)]

In [15]:
#Rename channel labels
df_channels['Name'] = df_channels['Name'].replace({'red': 'Red', 'green': 'Green', 'blue': 'Blue',
                                                   'red_green': 'Red-Green', 'red_blue': 'Red-Blue', 'green_blue': 'Green-Blue',
                                                   'cl:3, fl1:32, fl2:32, fl3:64, ks:3, st:2, dl:1, du1:64, dr:0.4, bs:32': 'RGB'})

#Order channels
orden = ['Red', 'Green', 'Blue', 'Red-Green', 'Red-Blue', 'Green-Blue', 'RGB']
df_channels = df_channels.set_index('Name').loc[orden].reset_index()

In [16]:
#Display resulting dataframe
df_channels

Unnamed: 0,Name,epoch/epoch,epoch/accuracy,epoch/val_accuracy,epoch/loss,epoch/val_loss,epoch/learning_rate,Accuracy Train,Accuracy Validation,Precision Train,Precision Validation,Recall Train,Recall Validation,F1 Train,F1 Validation
0,Red,26,0.911111,0.377778,0.235571,1.829135,0.0005,0.711111,0.688889,0.63964,0.75,0.855422,0.75,0.731959,0.75
1,Green,84,0.966667,0.977778,0.085746,0.068648,1.6e-05,0.988889,0.977778,0.987952,1.0,0.987952,0.964286,0.987952,0.981818
2,Blue,28,0.966667,0.511111,0.146361,1.773778,0.00025,0.688889,0.866667,0.604651,0.84375,0.939759,0.964286,0.735849,0.9
3,Red-Green,34,0.966667,0.888889,0.130697,0.304578,0.0005,0.811111,0.888889,0.810127,0.925926,0.771084,0.892857,0.790123,0.909091
4,Red-Blue,64,0.994444,0.933333,0.077202,0.174859,6.3e-05,0.977778,0.933333,0.987654,0.962963,0.963855,0.928571,0.97561,0.945455
5,Green-Blue,58,0.977778,0.8,0.08335,0.297427,0.000125,0.983333,0.955556,0.97619,0.964286,0.987952,0.964286,0.982036,0.964286
6,RGB,75,0.988889,0.955556,0.091203,0.204383,8e-06,0.977778,0.955556,0.964706,0.964286,0.987952,0.964286,0.97619,0.964286


In [17]:
#Save processed df as CSV
df_channels.to_csv('../data/results/channel_analysis.csv', index=False)