In [77]:
import torch
import torch.nn as nn
from IPython.display import clear_output
from IPython.core.debugger import set_trace
from torch.nn import functional as F
from torch import optim
import pickle
import math
%load_ext autoreload
%autoreload 2
import dlc_practical_prologue as dl

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
train_input, train_target, test_input, test_target = dl.load_data(flatten=False)
train_input = torch.functional.F.avg_pool2d(train_input, kernel_size = 2)
test_input = torch.functional.F.avg_pool2d(test_input, kernel_size = 2)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [3]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.block1 = nn.Sequential(nn.Conv2d(1,32, kernel_size=5,stride=1),
                                    nn.MaxPool2d(kernel_size=2,stride=2),
                                    nn.BatchNorm2d(32),
                                    nn.ReLU())
        self.block2 = nn.Sequential(nn.Conv2d(32,16, kernel_size=5,stride=1),
                                    nn.MaxPool2d(kernel_size=2,stride=2),
                                    nn.BatchNorm2d(16),
                                    nn.ReLU())
        self.lins = nn.Sequential(nn.Linear(256, 84),
                                  nn.ReLU(),
                                  nn.Linear(84, 10))

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.lins(x.view(-1,256))
        return x

In [4]:
def get_accuracy(model,inputs,targets):
    assert(inputs.size(0) == targets.size(0))
    tot_loss = 0
    nb_correct = 0
    batch_size = 20
    for train,target in zip(inputs.split(batch_size),
                           targets.split(batch_size)):
        pred = model(train)
        pred = torch.argmax(pred,axis = 1)
        nb_correct += (pred == target).int().sum().item()
    accuracy = nb_correct /inputs.size(0)
    print("accuracy: %.2f" % (accuracy) )
    return accuracy

In [5]:
def train_model(model,train_input,train_target,nb_epochs=25):
    optimizer = optim.AdamW(model.parameters(), lr = 1e-3)
    batch_size = 100
    criterion = nn.CrossEntropyLoss()
    for e in range(nb_epochs):
        clear_output(wait=True)
        print("Progression:{:.2f}".format(e/nb_epochs*100))
        for inputs,targets in zip(train_input.split(batch_size),
                            train_target.split(batch_size)):
            output = model(inputs)
            loss = criterion(output,targets)
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [7]:
class Net2(nn.Module):
    def __init__(self,n_hidden = 256,chan = 1):
        super(Net2,self).__init__()
        self.hidden = n_hidden
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(chan,32,kernel_size=3),
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32,64,kernel_size=3),
            nn.MaxPool2d(kernel_size=2,stride=2)
            ,nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.classifier = nn.Sequential(
            nn.Linear(256,n_hidden),
            nn.BatchNorm1d(n_hidden),
            nn.Linear(n_hidden,10))
            
    def forward(self,x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.classifier(x.view(x.size(0),-1))
        return x

In [8]:
model = Net2()
train_model(model,train_input,train_target)
get_accuracy(model,test_input,test_target)

Progression:96.00
accuracy: 0.93


0.934

In [9]:
def Kfold_CV(classtype,inputs,targets,K=4):
    assert(K>=2)
    N = inputs.size(0)
    indxes = torch.randperm(N)\
                  .split(int(N/K))
    accs = torch.empty(K)
    for k in range(K):
        model = classtype()
        
        test_indx = indxes[k]
        train_indx = torch.cat((indxes[:k]+indxes[k+1:]),0)
        
        train_inp,train_targ = inputs[train_indx],targets[train_indx]
        test_inp,test_targ = inputs[test_indx],targets[test_indx]
        train_model(model,train_inp,train_targ)
        acc = get_accuracy(model,test_inp,test_targ)
        accs[k] = acc
    print("Accuracies for {}-fold:{}".format(K,accs.tolist()))
    print("Mean acc:{}".format(accs.mean()))

In [10]:
Kfold_CV(Net2,train_input,train_target)

Progression:96.00
accuracy: 0.93
Accuracies for 4-fold:[0.9160000085830688, 0.9079999923706055, 0.9359999895095825, 0.9319999814033508]
Mean acc:0.9229999780654907


## Dealing with double Images

In [11]:
sep = "#" * 20

In [12]:
def accuracy_double_model(model,train_input,train_target,train_classes,verbose=False):
    assert(train_input.size(0) == train_target.size(0))
    N = train_input.size(0)
    tot_loss = 0
    nb_correct = 0
    batch_size = 20
    
    #given a prediction powre and the target, output the number of correctly classified samples
    add_res = lambda pred,target:(torch.argmax(pred,axis = 1) == target).int().sum().item()
    
    score0 = 0
    score1 = 0
    scorecomp = 0
    
    for inputs,comp_targs,classes in zip(train_input.split(batch_size),
                                           train_target.split(batch_size),
                                        train_classes.split(batch_size)):
        targ0 = classes[:,0]
        targ1 = classes[:,1]
        x0,x1,comp = model(inputs)
        
        score0 += add_res(x0,targ0)
        score1 += add_res(x1,targ1)
        scorecomp += add_res(comp,comp_targs)
        
    acc0 = score0 / N
    acc1 = score1 / N
    acc_comp = scorecomp / N
    
    if verbose:
        print("Accuracy 1st Network: {:^10.2f}".format(acc0) )
        print("Accuracy 2nd Network: {:^10.2f}".format(acc1))
        print("Accuracy comparison: {:^12.2f}".format(acc_comp))

    return acc0,acc1,acc_comp

In [13]:
class LeNet2imgs(nn.Module):
    def __init__(self,chan = 1):
        super(LeNet2imgs,self).__init__()
        self.net0 = Net2()
        self.net1 = Net2()
        self.linblock = nn.Sequential(nn.Linear(20,40),
                                     nn.LeakyReLU(),
                                     nn.Linear(40,80),
                                     nn.LeakyReLU(),
                                     nn.Linear(80,2))

        
    def forward(self,x):
        x0 = self.net0(x[:,0].unsqueeze(1))
        x1 = self.net0(x[:,1].unsqueeze(1))
        comp = torch.cat((x0,x1),dim=1)
        comp = self.linblock(comp)
        return x0,x1,comp

In [51]:
def train_double_model(train_input,train_target,train_classes,\
                       model,crit_comp,optimizer,lr,lambd_,nb_epochs=5,verbose=False):
    """
    Args:
        model: the model to train (3 arch)
        crit_comp: the criterion for comparison (2 sorts)
        optimizer: the chosen optimizer (3 types)
        lr: learning rate (4 types)
        ratio loss: the amount of each loss (3 values)
        lambd_: ratio lambd_ * comp_loss + (1-lambd_) * class_loss
    """
    crit_comp = crit_comp()
    optimizer = optimizer(model.parameters(), lr = lr)

    batch_size = 100
    crit_class = nn.CrossEntropyLoss()
    for e in range(nb_epochs):
        if verbose:
            clear_output(wait=True)
            print("Progression:{:.2f}".format(e/nb_epochs*100))
        for inputs,comp_targs,classes in zip(train_input.split(batch_size),
                                           train_target.split(batch_size),
                                           train_classes.split(batch_size)):
            targ0 = classes[:,0]
            targ1 = classes[:,1]
            x0,x1,comp = model(inputs)
            loss_class = crit_class(x0,targ0) + crit_class(x1,targ1)
            loss_comp = crit_comp(comp,comp_targs)
            totloss = lambd_ * loss_comp + (1- lambd_) *loss_class
            model.zero_grad()
            totloss.backward()
            optimizer.step()

In [52]:
N_SAMPLES = 500
a, b, c, d = dl.load_data(flatten=False)
train2_input, train2_target, train2_classes = dl.mnist_to_pairs(N_SAMPLES,a,b)
test2_input, test2_target, test2_classes = dl.mnist_to_pairs(N_SAMPLES,c,d)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [53]:
def Kfold_CVdouble(inputs,targets,classes,\
                   archi,crit_comp,optimizer,lr,lambd_,K=4,verbose=False):
    """
    Args:
        archi: the type of architecture for classif (3 arch)
        crit_comp: the criterion for comparison (2 sorts)
        optimizer: the chosen optimizer (3 types)
        lr: learning rate (4 types)
        ratio loss: the amount of each loss (3 values)
        lambd_: ratio lambd_ * comp_loss + (1-lambd_) * class_loss
    """
    assert(K>=2)
    N = inputs.size(0)
    indxes = torch.randperm(N)\
                  .split(int(N/K))
    accs = torch.empty(K,3)
    for k in range(K):
        model = archi()
        
        test_indx = indxes[k]
        train_indx = torch.cat((indxes[:k]+indxes[k+1:]),0)
        
        train_inp = inputs[train_indx]
        train_targ = targets[train_indx]
        train_classes = classes[train_indx]
        
        test_inp  = inputs[test_indx]
        test_targ = targets[test_indx]
        test_classes = classes[test_indx]
        
        train_double_model(train_inp,train_targ,train_classes,\
                          model,crit_comp,optimizer,lr,lambd_,verbose=verbose)
        res = accuracy_double_model(model,test_inp,test_targ,test_classes)
        #0th column: 1st group acc 1th column 2nd group acc 3rd column comp accuracy
        accs[k] = torch.Tensor(res)
    if verbose:
        print(sep + "Accuracies for {}-fold:".format(K) + sep)
        print("1st group acc:{:^14.2f}".format(accs[:,0].mean().item()))
        print("2nd group acc:{:^14.2f}".format(accs[:,1].mean().item()))
        print("Comparison acc:{:^12.2f}".format(accs[:,2].mean().item()))
    return accs[:,2].tolist()

In [56]:
Kfold_CVdouble(train2_input,train2_target,train2_classes,\
               LeNet2imgs,nn.CrossEntropyLoss,optim.SGD,1e-0,0.75,K=5,verbose=True)

Progression:80.00
####################Accuracies for 5-fold:####################
1st group acc:     0.85     
2nd group acc:     0.86     
Comparison acc:    0.81    


[0.800000011920929,
 0.8299999833106995,
 0.6899999976158142,
 0.8700000047683716,
 0.8799999952316284]

In [130]:
net2 = LeNet2imgs()
train_double_model(train2_input,train2_target,train2_classes,\
                   net2,nn.CrossEntropyLoss,optim.SGD,1e-0,0.75,nb_epochs=25)
print(sep + "Train accuracy:" + sep)
accuracy_double_model(net2,train2_input,train2_target,train2_classes,verbose=True)
print(sep+ "Test accuracy:" + sep)
_ = accuracy_double_model(net2,test2_input,test2_target,test2_classes,verbose=True)

Progression:96.00
####################Train accuracy:####################
Accuracy 1st Network:    1.00   
Accuracy 2nd Network:    1.00   
Accuracy comparison:     1.00    
####################Test accuracy:####################
Accuracy 1st Network:    0.92   
Accuracy 2nd Network:    0.93   
Accuracy comparison:     0.84    


## Grid Search Model

In [59]:
#1 Architecture
Archis = [LeNet2imgs]
#2 Comparison Loss Function
CompLoss = [nn.CrossEntropyLoss,nn.MSELoss]
#3 Optimizers
Optimizers = [optim.SGD,optim.Adam,optim.Adagrad,optim.AdamW]
#4 Learning Rates
LRs = [1e-4,1e-3,1e-2,1e-1,1]
#5 Ratios
Lambdas = [0.4,0.7,0.9]

In [71]:
class Param():
    @staticmethod
    def parse(classi):
        return str(classi).split('.')[-1].split("'")[0]
    def __init__(self,arch,loss,optim,lr,lambd_):
        self.arch = arch
        self.loss = loss
        self.optim = optim
        self.lr = lr
        self.lambd_ = lambd_
        self.params = [self.arch,self.loss,self.optim,self.lr,self.lambd_]
        self.score = -1
    def __str__(self):
        returned = "{}_{}_{}_{}_{}_#score#_{}".format(Param.parse(self.arch),
                                               Param.parse(self.loss),
                                               Param.parse(self.optim),
                                               self.lr,
                                               self.lambd_,
                                               self.score)
        return returned
    def __repr__(self):
        return str(self)


In [72]:
HYPER_GRID = torch.empty(len(Archis),len(CompLoss),len(Optimizers),len(LRs),len(Lambdas)).tolist()
for a,archi in enumerate(Archis):
    for b,loss in enumerate(CompLoss):
        for c,optim in enumerate(Optimizers):
            for d,lr in enumerate(LRs):
                for e,lambd_ in enumerate(Lambdas):
                    HYPER_GRID[a][b][c][d][e] = Param(archi,loss,optim,lr,lambd_)

In [73]:
def lin_view(HYPER_GRID):
    linHGRID = [e for a in HYPER_GRID for b in a for c in b for d in c for e in d]
    return linHGRID

In [74]:
lin_view(HYPER_GRID)[0]

LeNet2imgs_CrossEntropyLoss_SGD_0.0001_0.4_#score#_-1

In [70]:
def GridSearch(HYPER_GRID):
    linGRID = lin_view(HYPER_GRID)
    for i,param in enumerate(linGRID):
        clear_output(wait=True)
        print("Grid Search progression: {} %".format(i/len(linGRID)*100))
        arch,loss,opt,lr,lambds_ = param.params
        Kfold_CVdouble(train2_input,train2_target,train2_classes,\
                arch,loss,opt,lr,lambds_,K=5,verbose=False) 
    
    with open('HYPERPARAM.pkl', 'wb') as f:
        pickle.dump(HYPER_GRID,f)

In [69]:
GridSearch(HYPER_GRID)

Grid Search progression: 0.0 %
Grid Search progression: 0.8333333333333334 %


KeyboardInterrupt: 