# Parameter Investigation

In [3]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from functools import reduce
data = pd.read_csv('L2 Data Scientist Assessment - Data.csv', dtype = str, encoding = 'cp1252')

data['f1'] = data['f1'].astype(float)
data['accuracy'] = data['accuracy'].astype(float)

background = data[data['class'] == 'Background']
tissue = data[data['class'] == 'Tissue']
lesions = data[data['class'] == 'Lesions']

Store a list of the model parameters

In [25]:
parameters = ['dropoutFraction', 'augmentColor', 'augmentGeometry', 'balanceClasses', 'elasticDeform']
dfs = [background, tissue, lesions]

Function to group by Model and Parameter so we can compare the impact on f1 score

In [26]:
def f1_group(df, param):
    return df.groupby(['model',param]).agg({'f1':'mean'}).reset_index().sort_values(by=['f1'], ascending=False)

Loop through the parameters and show which model + parameter combinations scored highest

# Parameter effect on Lesion Classification

In [28]:
for param in parameters:
    print(f'Effect of "{param}" on Lesion Classification\n'.split('=')[0])
    print(f1_group(lesions, param))
    print()

Effect of "dropoutFraction" on Lesion Classification

            model dropoutFraction        f1
5  AE_InceptionV3               0  0.610407
6  AE_InceptionV3             0.2  0.600812
2         AE_FCN8               0  0.592926
0        AE_FCN16               0  0.580614
8     AE_Xception             0.2  0.533619
3         AE_FCN8             0.2  0.527383
1        AE_FCN16             0.2  0.469868
7     AE_ResNet50             0.2  0.415154
9   DeepLabV3Plus             0.2  0.394629
4    AE_Inception             0.2  0.200791

Effect of "augmentColor" on Lesion Classification

             model augmentColor        f1
14       Seg_Model        FALSE  0.689420
15       Seg_Model         TRUE  0.667295
7   AE_InceptionV3         TRUE  0.602410
6   AE_InceptionV3        FALSE  0.600480
2          AE_FCN8        FALSE  0.588557
11     AE_Xception         TRUE  0.564246
10     AE_Xception        FALSE  0.561992
8      AE_ResNet50        FALSE  0.547993
0         AE_FCN16        FALSE 

We can see that for "dropoutFraction", the models that have both 0.0 and 0.2 all performed better with a dropoutFraction of 0

"augmentColor" set to FALSE with the Seg_Model model gives the overall best result, but for some other model TRUE outperforms FALSE

"augmentGeometry" provides a significant improvement for the top performing models when set to TRUE

Likewise, "elasticDeform" provides a signifcant improvement for the top performing models when set to TRUE        



# Parameter effect on Tissue Classification

In [29]:
for param in parameters:
    print(f'Effect of "{param}" on Tissue Classification\n'.split('=')[0])
    print(f1_group(tissue, param))
    print()

Effect of "dropoutFraction" on Tissue Classification

            model dropoutFraction        f1
2         AE_FCN8               0  0.967286
5  AE_InceptionV3               0  0.963223
6  AE_InceptionV3             0.2  0.962203
0        AE_FCN16               0  0.962165
8     AE_Xception             0.2  0.960235
3         AE_FCN8             0.2  0.958992
1        AE_FCN16             0.2  0.958715
4    AE_Inception             0.2  0.956204
7     AE_ResNet50             0.2  0.953572
9   DeepLabV3Plus             0.2  0.950674

Effect of "augmentColor" on Tissue Classification

             model augmentColor        f1
14       Seg_Model        FALSE  0.969611
15       Seg_Model         TRUE  0.967337
7   AE_InceptionV3         TRUE  0.963609
11     AE_Xception         TRUE  0.963103
6   AE_InceptionV3        FALSE  0.961291
1         AE_FCN16         TRUE  0.961122
10     AE_Xception        FALSE  0.960708
3          AE_FCN8         TRUE  0.959982
4     AE_Inception        FALSE 