# MNIST Project

## Imports

In [1]:
import torch
from torch import Tensor 
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.autograd import Variable
import src.dlc_practical_prologue as prologue

## Loading the Data

In [2]:
def onehot_Y(target):
    '''
    Create one-hot labels for Y
    '''
    res = torch.zeros(target.size(0), 2)
    res[range(target.size(0)), target] = 1
    return res

In [3]:
def onehot_Class(target):
    res = torch.zeros(target.size(0), 10*target.size(1))
    res[range(target.size(0)), target[:, 0]] = 1
    res[range(target.size(0)), target[:, 1] + 10] = 1
    return res

In [4]:
N = 1000
train_X, train_Y, train_Class, test_X, test_Y, test_Class = prologue.generate_pair_sets(N)

In [5]:
#train_Y = onehot_Y(train_Y).long()
#train_Class = onehot_Class(train_Class).long()
#test_Y = onehot_Y(test_Y).long()
#test_Class = onehot_Class(test_Class).long()

In [6]:
mu, std = train_X.mean(), train_X.std()
train_X.sub_(mu).div_(std)
test_X.sub_(mu).div_(std)

tensor([[[[-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          ...,
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653]],

         [[-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          ...,
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653]]],


        [[[-0.4653, -0.4653, -0.4653,  ..., -0.4653, -0.4653, -0.4653],
          [-0.4653, -0.4653,

## Model 1: Naive convnet
For the first model, we create a naive convnet, not taking into account the structrue of the channels.

In [7]:
class convNet(nn.Module):
    def __init__(self):
        super(convNet, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [8]:
def train_model(model, criterion, optimizer, nb_epochs, minibatch_size, train_X, train_Y, verbose=False):
    for e in range(nb_epochs):
        for b in range(0, train_X.size(0), minibatch_size):
            out = model(train_X.narrow(0, b, minibatch_size))
            loss = criterion(out, train_Y.narrow(0, b, minibatch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()
        if(verbose): print(loss)
    return model

In [9]:
def compute_nb_errors(model, data_input, data_target, minibatch_size):
    nb_data_errors = 0
    for b in range(0, data_input.size(0), minibatch_size):
        out = model(data_input.narrow(0, b, minibatch_size))
        _, pred = torch.max(out.data, 1)
        for k in range(minibatch_size):
            if data_target[b+k] != pred[k]:
                nb_data_errors += 1
    return nb_data_errors

In [12]:
model1 = convNet()
model1 = train_model(model1, nn.CrossEntropyLoss(), optim.SGD(model1.parameters(), lr=1e-1), 100, 100, train_X, train_Y, verbose=True)

tensor(0.6794, grad_fn=<NllLossBackward>)
tensor(0.6529, grad_fn=<NllLossBackward>)
tensor(0.6240, grad_fn=<NllLossBackward>)
tensor(0.5779, grad_fn=<NllLossBackward>)
tensor(0.5367, grad_fn=<NllLossBackward>)
tensor(0.4974, grad_fn=<NllLossBackward>)
tensor(0.4523, grad_fn=<NllLossBackward>)
tensor(0.4379, grad_fn=<NllLossBackward>)
tensor(0.3877, grad_fn=<NllLossBackward>)
tensor(0.4385, grad_fn=<NllLossBackward>)
tensor(0.3271, grad_fn=<NllLossBackward>)
tensor(0.4138, grad_fn=<NllLossBackward>)
tensor(0.2939, grad_fn=<NllLossBackward>)
tensor(0.2768, grad_fn=<NllLossBackward>)
tensor(0.2392, grad_fn=<NllLossBackward>)
tensor(0.2303, grad_fn=<NllLossBackward>)
tensor(0.2876, grad_fn=<NllLossBackward>)
tensor(0.2588, grad_fn=<NllLossBackward>)
tensor(0.1905, grad_fn=<NllLossBackward>)
tensor(0.2463, grad_fn=<NllLossBackward>)
tensor(0.2017, grad_fn=<NllLossBackward>)
tensor(0.1575, grad_fn=<NllLossBackward>)
tensor(0.1148, grad_fn=<NllLossBackward>)
tensor(0.1374, grad_fn=<NllLossBac

In [13]:
compute_nb_errors(model1, test_X, test_Y, 100)

190

We observe that this model does not manage to learn the mapping very well.

## Model 2: Using transfer learning

### Part 1: Study of a good functioning digit detection network for 28x28 images

In [14]:
train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = False, normalize = True, flatten = False)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [15]:
train_input, train_target = Variable(train_input), Variable(train_target)
model = Net()

In [16]:
model = train_model(model, nn.CrossEntropyLoss(), optim.SGD(model.parameters(), lr=1e-1), 50, 100, train_input, train_target)

In [17]:
compute_nb_errors(model, test_input, test_target, 100)

65

### Part 2: Adapting the network to 14x14 images

In [18]:
# fist, create the training and testing dataset
train_target_14px = torch.cat((train_Class[:,0], train_Class[:,1]))
train_input_14px = torch.cat((train_X[:,0,:,:].resize_(1000,1,14,14), train_X[:,1,:,:].resize_(1000,1,14,14)))

test_target_14px = torch.cat((test_Class[:,0], test_Class[:,1]))
test_input_14px = torch.cat((test_X[:,0,:,:].resize_(1000,1,14,14), test_X[:,1,:,:].resize_(1000,1,14,14)))

In [19]:
class Net_14px(nn.Module):
    def __init__(self):
        super(Net_14px, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3) 
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 100)
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)) #image size 12x12-> image size 6x6
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)) #image size 4x4 -> image size 2x2
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [33]:
model_14px = Net_14px()
model_14px = train_model(model_14px, nn.CrossEntropyLoss(), optim.SGD(model_14px.parameters(), lr=1e-1), 100, 100, \
                         train_input_14px, train_target_14px, verbose=True)

tensor(2.3059, grad_fn=<NllLossBackward>)
tensor(2.3013, grad_fn=<NllLossBackward>)
tensor(2.2983, grad_fn=<NllLossBackward>)
tensor(2.2950, grad_fn=<NllLossBackward>)
tensor(2.2923, grad_fn=<NllLossBackward>)
tensor(2.2885, grad_fn=<NllLossBackward>)
tensor(2.2851, grad_fn=<NllLossBackward>)
tensor(2.2827, grad_fn=<NllLossBackward>)
tensor(2.2801, grad_fn=<NllLossBackward>)
tensor(2.2767, grad_fn=<NllLossBackward>)
tensor(2.2731, grad_fn=<NllLossBackward>)
tensor(2.2693, grad_fn=<NllLossBackward>)
tensor(2.2662, grad_fn=<NllLossBackward>)
tensor(2.2625, grad_fn=<NllLossBackward>)
tensor(2.2586, grad_fn=<NllLossBackward>)
tensor(2.2548, grad_fn=<NllLossBackward>)
tensor(2.2511, grad_fn=<NllLossBackward>)
tensor(2.2466, grad_fn=<NllLossBackward>)
tensor(2.2419, grad_fn=<NllLossBackward>)
tensor(2.2367, grad_fn=<NllLossBackward>)
tensor(2.2308, grad_fn=<NllLossBackward>)
tensor(2.2248, grad_fn=<NllLossBackward>)
tensor(2.2186, grad_fn=<NllLossBackward>)
tensor(2.2112, grad_fn=<NllLossBac

In [36]:
compute_nb_errors(model_14px, test_input_14px, test_target_14px, 100)

1781