In [35]:
import torch
import math
import numpy as np
from torch import optim
from torch import Tensor
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

In [3]:
N = 1000 # Number of data samples in training and test set

train_input, train_target, train_classes, \
    test_input, test_target, test_classes = prologue.generate_pair_sets(N)

print(train_input.shape)
print(train_target.shape)
print(train_classes.shape)

train_classes[:5]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST\raw\train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST\raw\train-images-idx3-ubyte.gz to ./data/mnist/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST\raw\train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST\raw\train-labels-idx1-ubyte.gz to ./data/mnist/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST\raw\t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST\raw\t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST\raw\t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./data/mnist/MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST\raw
Processing...
Done!
torch.Size([1000, 2, 14, 14])
torch.Size([1000])
torch.Size([1000, 2])


tensor([[9, 3],
        [5, 4],
        [7, 4],
        [9, 6],
        [8, 8]])

In [4]:
# Dans les practical il prend le mean de tout le input
# pas sur que ce soit le mieux, a changer peut etre
def normalize(input, mean, std):
    input.sub_(mean).div_(std)
  
mean = train_input.mean()
std = train_input.std()

normalize(train_input, mean, std)
normalize(test_input, mean, std)

In [5]:
class DigitNet(nn.Module):
    def __init__(self, nb_hidden):
        super(DigitNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [60]:
class DigitNetSingleOutput(nn.Module):
    def __init__(self, nb_hidden):
        super(DigitNetSingleOutput, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)
        self.fc3 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
class pairInputModel(nn.Module):
    def __init__(self, nb_hidden):
        super(pairInputModel, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [7]:
def train_model(model, train_input, train_target, mini_batch_size=25, 
                nb_epochs=25, criterion=nn.CrossEntropyLoss(), lr=1e-1):
    
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    
    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [64]:
'''
Function to train a siamese model.
Since this is used to compare two images as input, we use the contrastive loss rather than the cross entropy.
'''
def train_siamese_model(model, train_input_1, train_input_2, train_target, mini_batch_size=25, 
                nb_epochs=25, criterion=nn.CrossEntropyLoss(), lr=1e-1):
    
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    
    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            
            # Forward pass
            features_1 = model(train_input_1.narrow(0, b, mini_batch_size))
            features_2 = model(train_input_2.narrow(0, b, mini_batch_size))
            batch_target = train_target.narrow(0, b, mini_batch_size)
            
            # Compute the contrastive loss
            #euclidean_distance = F.pairwise_distance(features_1, features_2)
            #loss_contrastive = torch.mean((1 - batch_target) * torch.pow(euclidean_distance, 2) +
            #                          batch_target * torch.pow(torch.clamp(2 - euclidean_distance, min=0.0), 2))
            
            loss = criterion(torch.cat((features_1, features_2), 1), batch_target)
            
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [14]:
train_input_flat = train_input[:,0,:,:].reshape(N, 1, 14, 14)
test_input_flat = test_input[:,0,:,:].reshape(N, 1, 14, 14)
train_input_flat.shape

torch.Size([1000, 1, 14, 14])

In [15]:
train_classes_flat = train_classes[:,0].reshape(-1)
test_classes_flat = test_classes[:,0].reshape(-1)
test_classes_flat.shape

torch.Size([1000])

In [10]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size=25):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [70]:
def compute_nb_errors_siamese(model, data_input_1, data_input_2, data_target, mini_batch_size=25):

    nb_data_errors = 0

    for b in range(0, data_input_1.size(0), mini_batch_size):
        output_1 = model(data_input_1.narrow(0, b, mini_batch_size))
        output_2 = model(data_input_2.narrow(0, b, mini_batch_size))
        output = torch.cat((output_1, output_2), 1)
        _, predicted_classes = torch.max(output, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [12]:
def print_error(model, tr_input, tr_target, te_input, te_target):
    print('train_error {:.02f}% test_error {:.02f}%'.format(
                compute_nb_errors(model, tr_input, tr_target) / N * 100,
                compute_nb_errors(model, te_input, te_target) / N * 100))    

In [67]:
def print_error_siamese(model, tr_input_1, tr_input_2, tr_target, te_input_1, te_input_2, te_target):
    print('train_error {:.02f}% test_error {:.02f}%'.format(
                compute_nb_errors_siamese(model, tr_input_1, tr_input_2, tr_target) / N * 100,
                compute_nb_errors_siamese(model, te_input_1, te_input_2, te_target) / N * 100)) 

In [42]:
model = DigitNet(500)

train_model(model, train_input_flat, train_classes_flat)

In [18]:
print_error(model, train_input_flat, train_classes_flat, test_input_flat, test_classes_flat)

train_error 0.00% test_error 5.70%


In [19]:
model = pairInputModel(500)
train_model(model, train_input, train_target)

In [20]:
print_error(model, train_input, train_target, test_input, test_target)

train_error 0.00% test_error 18.00%


In [43]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

152326

In [51]:
train_input_1 = train_input[:,0,:,:].reshape(N, 1, 14, 14)
train_input_2 = train_input[:,1,:,:].reshape(N, 1, 14, 14)

test_input_1 = test_input[:,0,:,:].reshape(N, 1, 14, 14)
test_input_2 = test_input[:,1,:,:].reshape(N, 1, 14, 14)

In [65]:
model = DigitNetSingleOutput(500)
train_siamese_model(model, train_input_1, train_input_2, train_target)

In [71]:
print_error_siamese(model, train_input_1, train_input_2, train_target, test_input_1, test_input_2, test_target)

train_error 0.00% test_error 13.30%
