In [None]:
import sys
sys.path.insert(0,'..')
import cocpit
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
plt_params = {'axes.labelsize': 'xx-large',
         'axes.titlesize':'xx-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large',
         'legend.title_fontsize': 12}
plt.rcParams["font.family"] = "serif"
plt.rcParams.update(plt_params)

# Make Confusion Matrix - Validation

In [None]:
model = torch.load('/data/data/saved_models/no_mask/e50_bs128_k0_8models_vgg19').cuda()
val_data = torch.load('/data/data/saved_models/no_mask/val_data.pt')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_names = ['aggs','blank','blurry','budding',
              'bullets','columns','compact irregulars',
              'fragments','needles','plates','rimed aggs',
              'rimed columns','spheres']
val_loader = torch.utils.data.DataLoader(val_data,
                                         batch_size=128,
                                         shuffle=True,
                                         num_workers=20,
                                         pin_memory=True)
all_preds= []
all_labels = []
with torch.no_grad():

    for batch_idx, (imgs, labels, img_paths) in enumerate(val_loader):
        # get the inputs
        inputs = imgs.to(device)
        labels = labels.to(device)

        output = model(inputs)
        pred = torch.argmax(output, 1)

        all_preds.append(pred.cpu().numpy())
        all_labels.append(labels.cpu().numpy())


In [None]:
#NORMALIZED

cm = confusion_matrix(np.asarray(list(itertools.chain(*all_preds))), np.asarray(list(itertools.chain(*all_labels))))
cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(13,9))

heat = sns.heatmap(cmn, annot=True, fmt='.2f', xticklabels=class_names, yticklabels=class_names, cmap="Blues")
heat.set_xticklabels(heat.get_xticklabels(), rotation=90, fontsize=18)
heat.set_yticklabels(heat.get_xticklabels(), rotation=0, fontsize=18)


plt.ylabel('Actual Labels', fontsize=20)
plt.xlabel('Predicted Labels', fontsize=20);
plt.savefig('/data/data/plots/norm_conf_matrix.pdf', dpi=300, bbox_inches='tight')

In [None]:
cm = confusion_matrix(np.asarray(list(itertools.chain(*all_preds))), np.asarray(list(itertools.chain(*all_labels))))
fig, ax = plt.subplots(figsize=(13,9))

heat = sns.heatmap(cm, annot=True, fmt='.2f', xticklabels=class_names, yticklabels=class_names, cmap="Blues")
heat.set_xticklabels(heat.get_xticklabels(), rotation=90)
plt.ylabel('Actual Labels', fontsize=20)
plt.xlabel('Predicted Labels', fontsize=20);

## classification report

In [None]:
#Metrics classification report
all_labels_flat = [item for items in all_labels for item in items]
all_preds_flat = [item for items in all_preds for item in items]

classification_report(all_labels_flat, all_preds_flat, digits=3)

## read in saved data

In [None]:
train_csv = '/data/data/saved_models/no_mask/save_train_acc_loss_e50_bs128_k5_9models.csv'
df_train = pd.read_csv(train_csv, names=["Model", "Epoch", "Kfold", "Accuracy", "Loss"])
df_train.head()

In [None]:
val_csv = '/data/data/saved_models/no_mask/save_val_acc_loss_e50_bs128_k5_9models.csv'
df_val = pd.read_csv(val_csv, names=["Model", "Epoch", "Kfold", "Accuracy", "Loss"])
df_val.head()

In [None]:
#rename models for plotting
model_names = ['efficient', 'resnet18', 'resnet34',
               'resnet152', 'alexnet', 'vgg16', 'vgg19',
               'densenet169', 'densenet201']
new_names = ['Efficient-b0', 'ResNet-18', 'ResNet-34',
               'ResNet-152', 'AlexNet', 'VGG-16', 'VGG-19',
               'DenseNet-169', 'DenseNet-201']
convert_names = {model_names[i]: new_names[i] for i in range(len(new_names))}
convert_names

In [None]:
colors = {'Efficient-b0': 'k', 'ResNet-18': 'lightblue', 'ResNet-34': 'blue', 'ResNet-152': 'darkblue',
       'AlexNet': 'gold', 'VGG-16': 'red', 'VGG-19': 'darkred', 'DenseNet-169': 'lightgreen',
       'DenseNet-201': 'darkgreen'}
# colors = {'efficient': 'k', 'resnet18': '#fdbf6f', 'resnet34': '#ff7f00', 'resnet152': '#e31a1c',
#        'alexnet': '#a880bb', 'vgg16': '#b2df8a', 'vgg19': '#33a02c', 'densenet169': '#a6cee3',
#        'densenet201': '#1f78b4'}
#put in order of increasing time
# color_time = {'resnet18': '#fdbf6f', 'alexnet': '#a880bb', 'resnet34': '#ff7f00', 
#               'efficient': 'k', 'vgg16': '#b2df8a', 'densenet169': '#a6cee3',
#               'vgg19': '#33a02c', 'densenet201': '#1f78b4', 'resnet152': '#e31a1c'}
color_time = {'resnet18': 'lightblue', 'alexnet': 'gold', 'resnet34': 'blue',
             'efficient': 'k', 'vgg16': 'red', 'densenet169': 'lightgreen', 'vgg19': 'darkred', 
             'densenet201': 'darkgreen', 'resnet152': 'darkblue'}
num_epochs = int(val_csv[51:53])
kfold = int(val_csv[61:62])
num_models = len(model_names)

In [None]:
#reshape data
val_accs = np.average(df_val['Accuracy'].values.reshape(num_models, kfold, num_epochs), axis=1)
val_losses = np.average(df_val['Loss'].values.reshape(num_models, kfold, num_epochs), axis=1)
train_accs = np.average(df_train['Accuracy'].values.reshape(num_models, kfold, num_epochs), axis=1)
train_losses = np.average(df_train['Loss'].values.reshape(num_models, kfold, num_epochs), axis=1)

## acc/loss plots

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(13,8), sharex=True, sharey=True)

#colors = ['k', 'lightblue', 'blue', 'darkblue', 'gold', 'red', 'darkred', 'lightgreen', 'darkgreen']

#fig = plt.figure(figsize=(20,20))
ax1 = plt.subplot(2, 2, 1)

for i in range(num_models):
    ax1.scatter(np.arange(1,(num_epochs+1)), [i*100 for i in train_accs[i,:]],
                c=colors[model_names[i]], marker='o', s=35, label=convert_names[model_names[i]])
plt.ylabel("Accuracy [%]")
plt.ylim(40,100)
plt.xlim(1,num_epochs)
ax1.legend(title='Model type:', loc='best', prop={'size': 12}, ncol=2)
ax1.axes.xaxis.set_ticks([])
ax1.yaxis.set_ticks_position('both')
ax1.minorticks_on()
ax1.tick_params(axis='y', which='minor', direction='out')
#ax1.xaxis.set_tick_params(which='minor', bottom=False)
ax1.title.set_text('Training Data')

#fig = plt.figure(figsize=(20,5))
ax2 = plt.subplot(2, 2, 2)
for i in range(num_models):
    ax2.scatter(np.arange(1,(num_epochs+1)), [i*100 for i in val_accs[i,:]], c=colors[model_names[i]],
                marker='*', s=55, label=convert_names[model_names[i]])
plt.ylim(40,100)
plt.xlim(1,num_epochs)
#ax2.legend(title='Model type:', loc='best', prop={'size': 10})
ax2.axes.yaxis.set_ticks([])
ax2.axes.xaxis.set_ticks([])
ax2.yaxis.set_ticks_position('both')
#ax2.minorticks_on()
ax2.tick_params(axis='y', which='minor', direction='out')
ax2.xaxis.set_tick_params(which='minor', bottom=False)
ax2.title.set_text('Validation Data')

ax3 = plt.subplot(2, 2, 3)
for i in range(num_models):
    ax3.scatter(np.arange(1,(num_epochs+1)), [i for i in train_losses[i,:]],
                c=colors[model_names[i]], marker='o', s=35, label=convert_names[model_names[i]])
plt.xlabel("Epochs")
plt.ylabel("Loss")
#ax3.legend(title='Model type:', loc='best', prop={'size': 10})
plt.ylim(0,2.4)
plt.xlim(1,num_epochs)
plt.tight_layout()
ax3.yaxis.set_ticks_position('both')
ax3.minorticks_on()
ax3.tick_params(axis='y', which='minor', direction='out')
ax3.xaxis.set_tick_params(which='minor', bottom=False)

ax4 = plt.subplot(2, 2, 4)
for i in range(num_models):
    ax4.scatter(np.arange(1,(num_epochs+1)), [i for i in val_losses[i,:]],
                c=colors[model_names[i]], marker='*', s=55, label=convert_names[model_names[i]])
plt.xlabel("Epochs")
#ax4.legend(title='Model type:', loc='best', prop={'size': 10})
plt.ylim(0,2.4)
plt.xlim(1,num_epochs)
ax4.axes.yaxis.set_ticks([])
plt.tight_layout()
ax4.yaxis.set_ticks_position('both')
ax4.minorticks_on()
ax4.tick_params(axis='y', which='minor', direction='out')
ax4.xaxis.set_tick_params(which='minor', bottom=False)
plt.savefig('/data/data/plots/loss_acc_9models_bs128_e50_13classes_avgkfold.pdf')

## plot model timing

In [None]:
time_csv = '/data/data/saved_models/no_mask/model_timing2.csv'
df = pd.read_csv(time_csv)
df['Model'].astype(str)
df['Time'].astype(float)
df.replace(convert_names, inplace=True)
df = df.sort_values(by=['Time'])

In [None]:
fig, ax = plt.subplots(1,1,figsize = (10,5))

time = df['Time']/60
#ax = time.plot(kind='bar')
g=sns.barplot(x="Model", y="Time", data=df, ci=None, palette=color_time.values())
g.set_xlabel("Model");
g.set_ylabel("Training Time [minutes]");
g.set_xticklabels(df['Model'], rotation=90, fontsize=14);

In [None]:
time_csv = '/data/data/saved_models/no_mask/model_timing_samples1.csv'
df = pd.read_csv(time_csv, names=["Model", "Samples", "Time"])
df['Model'].astype(str)
df['Samples'].astype(int)
df['Time'].astype(float)
df.replace(convert_names, inplace=True)
df = df.set_index('Model')
df = df.loc[['ResNet-18', 'AlexNet', 'ResNet-34', 'Efficient-b0', 'VGG-16',
       'DenseNet-169', 'VGG-19', 'DenseNet-201', 'ResNet-152']]
df.reset_index(inplace=True)

In [None]:
ax=sns.catplot(data=df, kind="bar", x="Model", y="Time", hue="Samples", legend=True);
ax.set_xticklabels(rotation=90, fontsize=14);
ax.set(xlabel='Model Name', ylabel='Time [minutes]');

## plot cross validation

In [None]:
val_accs = df_val['Accuracy'].values.reshape(num_models, kfold, num_epochs)
val_accs_avg = np.average(df_val['Accuracy'].values.reshape(num_models, kfold, num_epochs), axis=1)

In [None]:
val_accs_avg = {new_names[i]: val_accs_avg[i,-1] for i in range(len(model_names))} 
val_accs_avg

In [None]:
val_accs_avg_sort = dict(sorted(val_accs_avg.items(), key=lambda x: x[1]))
val_accs_avg_sort

In [None]:
from collections import OrderedDict
sorted_colors = OrderedDict([(el, colors[el]) for el in val_accs_avg_sort])
sorted_colors

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(7,7), sharex=True, sharey=True)
fig.tight_layout(pad=3.0)
#fig = plt.figure(figsize=(20,20))
ax1 = plt.subplot(2, 1, 1)

for i in range(num_models):
    ax1.plot(np.arange(1,(kfold+1)), [i*100 for i in val_accs[i,:,-1]],
                c=colors[new_names[i]], marker='o', label=new_names[i])
plt.ylabel("Accuracy [%]")
plt.xlabel("Fold")
plt.ylim(70,100)
#plt.xlim(1,num_epochs)
#ax1.legend(title='Model type:', loc='best', prop={'size': 12})
# Shrink current axis by 20%
box = ax1.get_position()
ax1.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# Put a legend to the right of the current axis
ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)
ax1.axes.xaxis.set_ticks(np.arange(1,6,1))
ax1.yaxis.set_ticks_position('both')
ax1.minorticks_on()
ax1.tick_params(axis='y', which='minor', direction='out')
#ax1.xaxis.set_tick_params(which='minor', bottom=False)
ax1.title.set_text('Validation Data Accuracies')

ax2 = plt.subplot(2, 1, 2)
plt.bar(np.arange(1,num_models+1), [i*100 for i in val_accs_avg_sort.values()],
        color=sorted_colors.values())
plt.ylabel("Average Accuracy [%]")
plt.xlabel("Model Name")
plt.ylim(85,100)
#plt.xlim(1,num_epochs)
# Shrink current axis by 20%
box = ax2.get_position()
ax2.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# Put a legend to the right of the current axis
# Set number of ticks for x-axis
ax2.set_xticks(np.arange(1,10))
# Set ticks labels for x-axis
ax2.set_xticklabels(sorted_colors.keys(), rotation='vertical')
ax2.yaxis.set_ticks_position('both')
