In [None]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms


In [None]:
device = torch.device("cuda")

In [None]:
transform = transforms.ToTensor()
train_dataset = datasets.MNIST("data", download=True, transform=transform, train=True)
test_dataset = datasets.MNIST("data", download=True, transform=transform, train=False)


In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

In [None]:
image, label = train_dataset[10]
print(image.shape)
print(label)

torch.Size([1, 28, 28])
3


In [None]:
linear_layer = nn.Linear(100, 200)
x = torch.rand(64, 100)
y = linear_layer(x)
print(y.shape)

torch.Size([64, 200])


In [None]:
conv_layer = nn.Conv2d(3, 10, kernel_size=3)
x = torch.rand(64, 3, 150, 150)
y = conv_layer(x)
print(y.shape)


torch.Size([64, 10, 148, 148])


### Exercise 1

Re-define the `conv` layer below setting the appropriate property such that the output spatial shape is the same as the input one. Look at the PyTorch documentation (`https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d`) for more reference.

In [None]:
# TO DO: define the conv layer below and ensure that the output tensor shape in dimensions {H, W} 
#( as in [1, channels, H, W] ) will be the same as the input in both cases.
conv = torch.nn.Conv2d(1, 10, 3, padding=1)

x = torch.rand(1, 1, 20, 20)
y = conv(x)
print(y.shape)

x = torch.rand(1, 1, 11, 11)
y = conv(x)
print(y.shape)


torch.Size([1, 10, 20, 20])
torch.Size([1, 10, 11, 11])


## Pooling

![](https://qph.fs.quoracdn.net/main-qimg-40cdeb3b43594f4b1b1b6e2c137e80b7)

In [None]:
NUM_BITS_FLOAT32 = 32

class CNNMemAnalyzer(nn.Module):

  def __init__(self, layers):
    super().__init__()
    self.layers = layers
  
  def forward(self, x):
    tot_mbytes = 0
    spat_res = []
    for layer in self.layers:
      h = layer(x)
      mem_h_bytes = np.cumprod(h.shape)[-1] * NUM_BITS_FLOAT32 // 8
      mem_h_mb = mem_h_bytes / 1e6
      print('-' * 30)
      print('New feature map of shape: ', h.shape)
      print('Mem usage: {} MB'.format(mem_h_mb))
      x = h
      if isinstance(layer, nn.Conv2d):
        # keep track of the current spatial width for conv layers
        spat_res.append(h.shape[-1])
      tot_mbytes += mem_h_mb
    print('=' * 30)
    print('Total used memory: {:.2f} MB'.format(tot_mbytes))
    return tot_mbytes, spat_res


In [None]:
cnn = CNNMemAnalyzer(nn.ModuleList([
                                    nn.Conv2d(1, 32, 3),
                                    nn.Conv2d(32, 64, 3),
                                    nn.Conv2d(64, 64, 3),
                                    nn.Conv2d(64, 128, 3),
                                    nn.Conv2d(128, 512, 3),
]))


In [None]:
tot_mbytes, spat_res = cnn(torch.randn(1, 1, 512, 512))


------------------------------
New feature map of shape:  torch.Size([1, 32, 510, 510])
Mem usage: 33.2928 MB
------------------------------
New feature map of shape:  torch.Size([1, 64, 508, 508])
Mem usage: 66.064384 MB
------------------------------
New feature map of shape:  torch.Size([1, 64, 506, 506])
Mem usage: 65.545216 MB
------------------------------
New feature map of shape:  torch.Size([1, 128, 504, 504])
Mem usage: 130.056192 MB
------------------------------
New feature map of shape:  torch.Size([1, 512, 502, 502])
Mem usage: 516.104192 MB
Total used memory: 811.06 MB


In [None]:
cnn = CNNMemAnalyzer(nn.ModuleList([
                                    nn.Conv2d(1, 32, 3),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(32, 64, 3),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(64, 64, 3),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(64, 128, 3),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(128, 512, 3),
]))


In [None]:
tot_mbytes, spat_res = cnn(torch.randn(1, 1, 512, 512))


------------------------------
New feature map of shape:  torch.Size([1, 32, 510, 510])
Mem usage: 33.2928 MB
------------------------------
New feature map of shape:  torch.Size([1, 32, 255, 255])
Mem usage: 8.3232 MB
------------------------------
New feature map of shape:  torch.Size([1, 64, 253, 253])
Mem usage: 16.386304 MB
------------------------------
New feature map of shape:  torch.Size([1, 64, 126, 126])
Mem usage: 4.064256 MB
------------------------------
New feature map of shape:  torch.Size([1, 64, 124, 124])
Mem usage: 3.936256 MB
------------------------------
New feature map of shape:  torch.Size([1, 64, 62, 62])
Mem usage: 0.984064 MB
------------------------------
New feature map of shape:  torch.Size([1, 128, 60, 60])
Mem usage: 1.8432 MB
------------------------------
New feature map of shape:  torch.Size([1, 128, 30, 30])
Mem usage: 0.4608 MB
------------------------------
New feature map of shape:  torch.Size([1, 512, 28, 28])
Mem usage: 1.605632 MB
Total used m

# How LeNet works

![](https://miro.medium.com/max/2154/1*1TI1aGBZ4dybR6__DI9dzA.png)

### Exercise 2

Make the `ConvBlock` class to properly do: `Conv2d`, `ReLU`, and `MaxPool2d`. Ensure that for an input of size `1x32x32` you obtain an output feature map of size `6x14x14` as shown in the figure above for layer `S2`.

In [None]:
class ConvBlock(nn.Module):

  def __init__(self, num_inp_channels, num_out_fmaps, 
               kernel_size, pool_size=2):
    super().__init__()
    # TODO: define the 3 modules needed
    self.conv = nn.Conv2d(num_inp_channels, num_out_fmaps, kernel_size)
    self.relu = nn.ReLU()
    self.maxpool = nn.MaxPool2d(pool_size)
  
  def forward(self, x):
    return self.maxpool(self.relu(self.conv(x)))


block = ConvBlock(1, 10, 3)
x = torch.rand(64, 1, 28, 28)
y = block(x)
print(y.shape)

torch.Size([64, 10, 13, 13])


### Exercise 3

Finish the `PseudoLeNet` class by including the following: 
1. As the input images from MNIST are 28x28, add padding to make them 32x32 with the `torch.nn.ConstantPad2d` (https://pytorch.org/docs/stable/nn.html#torch.nn.ConstantPad2d).
2. Build the `mlp` classifier as a `nn.Sequential` stack of fully connected layers and ReLU activations, with the sizes shown in the figure above: [120, 84, 10]. Plug the appropriate output activation in the end to do multi-class classification.
3. Remember to "flatten" the feature maps coming out of the second `ConvBlock` and connect them to the output `mlp` to build the classifier in the `forward` function. This has to be done because fully connected layers (`Linear`) only accept features without any spatial dimension. Hence, all these spatial dimensions and channels are unrolled into single vectors, one per batch sample. **HINT: Remember the `.view()` operator to change tensors shape!**


In [None]:
class PseudoLeNet(nn.Module):

  def __init__(self):
    super().__init__()
    # TODO: Define the padding
    self.pad = nn.ConstantPad2d(2, 0)
    self.conv1 = ConvBlock(1, 6, 5)
    self.conv2 = ConvBlock(6, 16, 5)
    # TODO: Define the MLP at the deepest layers
    self.mlp = nn.Sequential(
        # nn.Flatten(start_dim=1),
        nn.Linear(5*16*16, 120),
        nn.ReLU(),
        nn.Linear(120, 84),
        nn.ReLU(),
        nn.Linear(84, 10),
    )

  def forward(self, x):
    # x  [1, 28, 28] 
    x = self.pad(x)  # [1, 32, 32] 
    x = self.conv1(x)  # [6, 14, 14] 
    x = self.conv2(x)  # [16, 5, 5] 
    # Obtain the parameters of the tensor in terms of:
    # 1) batch size
    # 2) number of channels
    # 3) spatial "height"
    # 4) spatial "width"
    bsz, nch, height, width = x.shape
    # TODO: Flatten the feature map with the view() operator 
    # within each batch sample  
    x = x.view(bsz, -1)
    y = self.mlp(x)
    return y
