In [2]:
import torch
from torch import nn
import dlc_practical_prologue as prologue
import matplotlib.pyplot as plt

from helpers import grid_search, tune_hyperparameters

##tmp
import numpy as np

In [None]:
N = 1000
epochs = 25
mini_batch_size = 100

In [None]:
######################################################    
# generate kfold cross validation pairs
# keep this pair constant throughout the tuning process
######################################################    

kfold_input, kfold_target, kfold_classes, \
    test_input, test_target, test_classes = \
    prologue.generate_pair_sets(N)

k_folds = 4
shuffle_indices = torch.randperm(len(kfold_input))
split_indices = torch.split(shuffle_indices, int(torch.tensor(len(kfold_input) / k_folds).item()))


kfold_train_dict = {}
kfold_validation_dict = {}

for i in range(k_folds):
    kfold_train_indices = torch.cat(split_indices[0:i] + split_indices[i+1:])
    kfold_validation_indices = split_indices[i]

    
    kfold_train_dict[i] = {'input':kfold_input[kfold_train_indices],'target':kfold_target[kfold_train_indices],'classes':kfold_classes[kfold_train_indices]}
    kfold_validation_dict[i] = {'input':kfold_input[kfold_validation_indices],'target':kfold_target[kfold_validation_indices],'classes':kfold_classes[kfold_validation_indices]}

    
######################################################    
# kfold cross validation for lambda and learn rate   #
######################################################    
lambdas = torch.logspace(start=-2, end=0, steps=5)
learn_rates = torch.logspace(start=-4, end=1, steps=10)
learn_rates2 = torch.logspace(start=-4, end=1, steps=5)

model_noaux = ['siam','naive']
model_aux = ['siam_aux_hidden','siam_aux']

grid_model_dict = {}

print('======================================================')
print('Tuning lambda and learn rate for each architecture....')

for m in model_noaux:
    train_error = []
    validation_error = []
    losses_train = []
    
    for lr in learn_rates:
        if m == 'naive':
            Model = NaiveCNN(1)
        elif m == 'siam':
            Model = SharedWeight(1)
        else:
            print('invalid model type')
            break

        kfold_losses_train = []
        kfold_train_error = []
        kfold_validation_error = []

        for i in range(k_folds):
            kfold_losses_train.append(train_model(Model, kfold_train_dict[i]['input'], kfold_train_dict[i]['target'],\
                                        learn_rate_= lr, lambda_=0, mini_batch_size=mini_batch_size, nb_epochs = 25))

            kfold_train_error.append(compute_nb_errors(Model, kfold_train_dict[i]['input'], kfold_train_dict[i]['target'], mini_batch_size))
            kfold_validation_error.append(compute_nb_errors(Model, kfold_validation_dict[i]['input'], kfold_validation_dict[i]['target'], mini_batch_size))

        train_error.append(torch.tensor(kfold_train_error ,dtype=torch.float).mean())
        losses_train.append(torch.tensor(kfold_losses_train ,dtype=torch.float).mean())
        validation_error.append(torch.tensor(kfold_validation_error ,dtype=torch.float).mean())
        
    grid_model_dict[m] = {'train_error':train_error, 'validation_error':validation_error, 'losses_train':losses_train}
#    print(m)
    
for m in model_aux:
    train_error = []
    validation_error = []
    losses_train = []

    for lbd in lambdas:
    # optimize the learning rate
        for lr in learn_rates2:
            if m == 'siam_aux':
                Model = SharedWeight(1)
            elif m == 'siam_aux_hidden':
                Model = SharedWeight(2)
            else:
                print('invalid model type')
                break
            
            kfold_losses_train = []
            kfold_train_error = []
            kfold_validation_error = []

            for i in range(k_folds):

                
                kfold_losses_train.append(train_model(Model, kfold_train_dict[i]['input'], kfold_train_dict[i]['target'], kfold_train_dict[i]['classes'], \
                            learn_rate_= lr, lambda_=lbd, mini_batch_size=mini_batch_size, nb_epochs = 25))

                kfold_train_error.append(compute_nb_errors(Model, kfold_train_dict[i]['input'], kfold_train_dict[i]['target'], mini_batch_size))
                kfold_validation_error.append(compute_nb_errors(Model, kfold_validation_dict[i]['input'], kfold_validation_dict[i]['target'], mini_batch_size))

            train_error.append(torch.tensor(kfold_train_error ,dtype=torch.float).mean())
            losses_train.append(torch.tensor(kfold_losses_train ,dtype=torch.float).mean())
            validation_error.append(torch.tensor(kfold_validation_error ,dtype=torch.float).mean())
                        
    grid_model_dict[m] = {'train_error':train_error, 'validation_error':validation_error, 'losses_train':losses_train}
#    print(m)


######################################################    
# getting lambda and learn rate for each model   #
# based on validation error
######################################################    

hyperparam_pair = []
for lbd in lambdas:
    for lr in learn_rates2:
        hyperparam_pair.append({'lambda': lbd,'learn_rate': lr})

hyperparam_dict_1 = {}
for m in model_noaux:
    hyperparam_dict_1[m] = {'lambda':0, 'learn_rate':learn_rates[torch.argmin(torch.tensor(grid_model_dict[m]['validation_error']))]}

for m in model_aux: 
    p = hyperparam_pair[torch.argmin(torch.tensor(grid_model_dict[m]['validation_error']))]
    hyperparam_dict_1[m] = {'lambda':p['lambda'], 'learn_rate':p['learn_rate']}


print('Finished tuning lambda and learn rate for each architecture')
print(hyperparam_dict_1)


######################################################    
# kfold cross validation for batch size   #
######################################################    

print('======================================================')
print('Tuning batch size for each architecture....')

batchsizes = [5,10,25,50,125]
batchsize_dict = {}

model_type = ['naive','siam','siam_aux','siam_aux_hidden']
for m in model_type:
    losses_train = []
    train_error = []
    validation_error = []
    lbd = hyperparam_dict_1[m]['lambda']
    lr = hyperparam_dict_1[m]['learn_rate']


    for batch in batchsizes:
        if m == 'naive':
            Model = NaiveCNN(1)
        elif m == 'siam' or 'siam_aux':
            Model = SharedWeight(1)
        elif m == 'siam_aux_hidden':
            Model = SharedWeight(2)
        else:
            print('invalid model type')
            break

        kfold_losses_train = []
        kfold_train_error = []
        kfold_validation_error = []

        
        for i in range(k_folds):
            if m == 'siam_aux' or 'siam_aux_hidden':
                kfold_losses_train.append(train_model(Model, kfold_train_dict[i]['input'], kfold_train_dict[i]['target'], kfold_train_dict[i]['classes'], \
                            learn_rate_= lr, lambda_=lbd, mini_batch_size=mini_batch_size, nb_epochs = 25))
            else:
                kfold_losses_train.append(train_model(Model, kfold_train_dict[i]['input'], kfold_train_dict[i]['target'],\
                                            learn_rate_= lr, lambda_=lbd, mini_batch_size=batch, nb_epochs = 25))
                
            kfold_train_error.append(compute_nb_errors(Model, kfold_train_dict[i]['input'], kfold_train_dict[i]['target'], batch))
            kfold_validation_error.append(compute_nb_errors(Model, kfold_validation_dict[i]['input'], kfold_validation_dict[i]['target'], batch))

        train_error.append(torch.tensor(kfold_train_error ,dtype=torch.float).mean())
        losses_train.append(torch.tensor(kfold_losses_train ,dtype=torch.float).mean())
        validation_error.append(torch.tensor(kfold_validation_error ,dtype=torch.float).mean())


    fig = plt.figure()
    plt.semilogx(batchsizes, train_error,c='steelblue',label="train_error", marker='x')
    plt.semilogx(batchsizes, validation_error,c='orangered', label="validation_error", marker='x')
    plt.title(m + " network hyperparameter optimization - mini batch size")
    plt.xlabel("mini batch size")
    plt.ylabel("nb error")
    plt.legend()
    plt.show()
    fig.savefig(m + "_batch_size_CV.pdf") # Use fig. here

    batchsize_dict[m] = batchsizes[torch.argmin(torch.tensor(validation_error))]
    
######################################################    
# create a complex of hyperparam   #
######################################################      
hyperparam_dict = {}
for m in model_type:
    hyperparam_dict[m] = {'lambda':hyperparam_dict_1[m]['lambda'], 'learn_rate':hyperparam_dict_1[m]['learn_rate'], 'batchsize':batchsize_dict[m]}

print('Finished tuning batch size for each architecture')
print('This is the final aggregation of the hyperparams')    
print(hyperparam_dict)    


######################################################    
# test the models and compare the performance  #
######################################################      

print('======================================================')
print('Testing the model performances....')

import numpy as np
#generate boxplot across models
SIAM_auxi_train_error = []
SIAM_auxi_test_error = []
SIAM_train_error = []
SIAM_test_error = []
NAIVE_train_error = []
NAIVE_test_error = []
SIAM_auxi_5_train_error = []
SIAM_auxi_5_test_error = []




for i in range(10):
    train_input, train_target, train_classes, \
    test_input, test_target, test_classes = \
    prologue.generate_pair_sets(N)
    
    NaiveCN = NaiveCNN(1)
    Siamese = SharedWeight(1)
    Siamese_auxi = SharedWeight(1)
    Siamese_auxi_5 = SharedWeight(2)
    
    hnaive = hyperparam_dict['naive']
    hsiam = hyperparam_dict['siam']
    hsiamaux = hyperparam_dict['siam_aux']
    hsiamhidaux= hyperparam_dict['siam_aux_hidden']
    
    train_model(NaiveCN, train_input, train_target, learn_rate_= hnaive['learn_rate'], lambda_=hnaive['lambda'], mini_batch_size=hnaive['batchsize'], nb_epochs = 25)
    train_model(Siamese, train_input, train_target, learn_rate_= hsiam['learn_rate'], lambda_=hsiam['lambda'], mini_batch_size=hsiam['batchsize'], nb_epochs = 25)
    train_model(Siamese_auxi, train_input, train_target, train_classes, learn_rate_=hsiamaux['learn_rate'], lambda_=hsiamaux['lambda'], mini_batch_size=hsiamaux['batchsize'], nb_epochs = 25)
    train_model(Siamese_auxi_5, train_input, train_target, train_classes, learn_rate_= hsiamhidaux['learn_rate'], lambda_=hsiamhidaux['lambda'], mini_batch_size=hsiamhidaux['batchsize'], nb_epochs = 25)
    
    SIAM_auxi_5_train_error.append(compute_nb_errors(Siamese_auxi_5, train_input, train_target, hsiamhidaux['batchsize']))
    SIAM_auxi_5_test_error.append(compute_nb_errors(Siamese_auxi_5, test_input, test_target, hsiamhidaux['batchsize']))
    SIAM_auxi_train_error.append(compute_nb_errors(Siamese_auxi, train_input, train_target, hsiamaux['batchsize']))
    SIAM_auxi_test_error.append(compute_nb_errors(Siamese_auxi, test_input, test_target, hsiamaux['batchsize']))
    SIAM_train_error.append(compute_nb_errors(Siamese, train_input, train_target, hsiam['batchsize']))
    SIAM_test_error.append(compute_nb_errors(Siamese, test_input, test_target, hsiam['batchsize']))
    NAIVE_train_error.append(compute_nb_errors(NaiveCN, train_input, train_target, hnaive['batchsize']))
    NAIVE_test_error.append(compute_nb_errors(NaiveCN, test_input, test_target, hnaive['batchsize']))
    
fig = plt.figure(figsize=(12, 6), dpi=80)
vals =[np.array(NAIVE_train_error)/10,
       np.array(NAIVE_test_error)/10,
       np.array(SIAM_train_error)/10,
       np.array(SIAM_test_error)/10,
       np.array(SIAM_auxi_train_error)/10,
       np.array(SIAM_auxi_test_error)/10,
       np.array(SIAM_auxi_5_train_error)/10,
       np.array(SIAM_auxi_5_test_error)/10
      ]
labels= ["NAIVE train\n(1 hidden)",
         "NAIVE test\n(1 hidden)",
         "SIAM_train\n(1 hidden)",
         "SIAM_test\n(1 hidden)",
         "SIAM+auxi train\n(1 hidden)",
         "SIAM+auxi test\n(1 hidden)",
         "SIAM+auxi train\n(2 hidden)",
         "SIAM+auxi test\n(2 hidden)"
        ]

plt.boxplot(vals, labels=labels, zorder=1)
for i in range(len(vals)):
    plt.plot([i+1]*10,vals[i],".", markersize=7, label=labels[i].replace('\n', ' '))
plt.hlines(15, xmin=0.5, xmax=8.5, ls='--', color='gray')
plt.title("Models performance comparison")
plt.ylabel("error [%]")
plt.legend()
plt.show()

fig.savefig("models_error_boxplot_2.pdf")    

print('Finished testing the model performances')

In [None]:
sum(p.numel() for p in Siamese_auxi_5.parameters() if p.requires_grad)

In [None]:
sum(p.numel() for p in Siamese_auxi.parameters() if p.requires_grad)

In [None]:
sum(p.numel() for p in Siamese.parameters() if p.requires_grad)

In [None]:
val = [
        NAIVE_test_error,
        SIAM_test_error,
        SIAM_auxi_test_error,
        SIAM_auxi_5_test_error
        ]

In [None]:
for i in range(len(val)):
    print((np.random.choice(val[i], 1000)/10).std())

In [None]:
for i in range(len(val)):
    print((np.random.choice(val[i], 1000)/10).mean())

In [None]:
for i in range(len(val)):
    print(100-(np.random.choice(val[i], 1000)/10).mean())