# MNIST - ConvNet vs. FCLayers

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
from matplotlib import pyplot as plt

In [None]:
from common.model_tools import get_n_params
from common.plot_tools import set_default

In [None]:
set_default()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Prepare Dataset

In [None]:
mnist_train_ds = datasets.MNIST("./data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]))
mnist_test_ds = datasets.MNIST("./data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]))

mnist_train_loader = DataLoader(mnist_train_ds, batch_size=64, shuffle=True, num_workers=1)
mnist_test_loader = DataLoader(mnist_test_ds, batch_size=1000, shuffle=True, num_workers=1)

In [None]:
plt.figure(figsize=(8,3), dpi=100)
for i in range(10):
  plt.subplot(2, 5, i+1)
  image, _ = mnist_train_ds.__getitem__(i)
  plt.imshow(image.squeeze().numpy())
  plt.axis('off')

## Define Train and Test

In [None]:
def train(epoch, model, optimizer):
  # 1. set the model in training mode
  model.train()

  for batch_idx, (data, target) in enumerate(mnist_train_loader):

    # 2. send to device
    data, target = data.to(device), target.to(device)

    # 3. reset the grads
    optimizer.zero_grad()

    # 4. forward pass
    output = model(data)

    # 5. compute loss
    loss = F.nll_loss(output, target)

    # 6. compute the gradients
    loss.backward()

    # 7. update the parameters
    optimizer.step()

    if batch_idx % 100 == 0:
      print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(mnist_train_loader.dataset)} ({100. * batch_idx / len(mnist_train_loader):.0f}%)]\tLoss: {loss.item():.4f}')

In [None]:
def test(epoch, model):
  # 1. set the model in evaluation mode
  model.eval()
  test_loss = 0
  correct = 0

  for data, target in mnist_test_loader:

    # 2. send to device
    data, target = data.to(device), target.to(device)

    # 3. forward
    output = model(data)

    # 4. compute loss
    test_loss += F.nll_loss(output, target, reduction='sum').item()

    # 5. compute accuracy
    pred = output.data.max(1, keepdim=True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

  test_loss /= len(mnist_test_loader.dataset)
  accuracy = 100. * correct / len(mnist_test_loader.dataset)
  print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(mnist_test_loader.dataset)} ({accuracy:.0f}%)')

## Fully Connected Layers Model

In [None]:
class FC2Layer(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(FC2Layer, self).__init__()
    self.input_size = input_size
    self.network = nn.Sequential(
        nn.Linear(input_size, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, output_size),
        nn.LogSoftmax(dim=1)
    )

  def forward(self, x):
    x = x.view(-1, self.input_size)
    return self.network(x)

In [None]:
# create the model
fc2_model = FC2Layer(28*28, 8, 10)

# send to device
fc2_model.to(device)

# create optimizer
optimizer = optim.SGD(fc2_model.parameters(), lr=0.01, momentum=0.05)

# print num of parameters
print(f'Number of params {get_n_params(fc2_model)}')

# train
for epoch in range(0, 1):
  train(epoch, fc2_model, optimizer)
  test(epoch, fc2_model)

## ConvNet Model

In [None]:
class ConvNet(nn.Module):
  def __init__(self, input_size, n_features, output_size):
    super(ConvNet, self).__init__()
    self.input_size = input_size
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=n_features, kernel_size=5)
    self.conv2 = nn.Conv2d(in_channels=n_features, out_channels=n_features, kernel_size=5)
    self.fc1 = nn.Linear(n_features*4*4, 50)
    self.fc2 = nn.Linear(50, output_size)
                  
  def forward(self, x):
    x = self.conv1(x)
    x = F.relu(x)
    x = F.max_pool2d(x, kernel_size=2)
    x = self.conv2(x)
    x = F.relu(x)
    x = F.max_pool2d(x, kernel_size=2)
    x = x.view(-1, n_features*4*4)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    x = F.log_softmax(x, dim=1)
    return x

In [None]:
n_features = 6 # number of feature maps

# create the model
model_cnn = ConvNet(28*28, n_features, 10)

# send to device
model_cnn.to(device)

# create optimizer
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5)

# print number of params
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

# train
for epoch in range(0, 1):
    train(epoch, model_cnn, optimizer)
    test(epoch, model_cnn)