In [2]:
import torch

In [3]:
import torch.nn as nn

In [3]:
class Convolution(nn.Module):
    def __init__(self, input_shape, kernel_size, n_kernels):
        super().__init__()
        
        input_depth, input_height, input_width = input_shape
        kernel_height, kernel_width = kernel_size
        
        self.n_kernels = n_kernels
        self.input_shape = input_shape
        self.input_depth = input_depth
        
        self.output_shape = (n_kernels,
                             input_height - kernel_height + 1,
                             input_width - kernel_width + 1)
        self.kernel_shape = (n_kernels, input_depth, kernel_height, kernel_width)
        
        self.kernels = torch.rand(*self.kernel_shape)
        self.biases = torch.rand(*self.output_shape)

##### Example 2

In [4]:
conv = Convolution(input_shape=(3, 28, 28), kernel_size=(4, 4), n_kernels=7)

In [5]:
conv.kernel_shape

(7, 3, 4, 4)

What's shape of the kernel (one matrix contains all the kernels)? Explain

**Explain**
- `7`: the number of kernels, equals to `n_kernels = 7`
- `3`: the depth of the image (aka: number of channels), equals to `input_shape[0] = 3`
- `4`: the `kernel_height` (aka: number of rows) of each invidiual kernel, equals to `kernel_size[0] = 4`
- `4`: the `kernel_width` (aka: number of columns) of each invidiual kernel, equals to `kernel_size[1] = 4`

##### Example 3

In [6]:
conv = Convolution(input_shape=(3, 28, 28), kernel_size=(2, 2), n_kernels=5)

What is the output of the forward pass? Explain

In [7]:
conv.output_shape

(5, 27, 27)

**Explain**
- `5`: the number of kernels, equals to `n_kernels = 5`
- `27`: the height of the output (aka rows), computed by `input_height - kernel_height + 1 = 28 - 2 + 1 = 27`
- `27`: the width of the output (aka columns), computed by `input_width - kernel_width + 1 = 28 - 2 + 1 = 27`

##### Example 4

`nn.Conv2d` contains 3 parameters: `in_channels`, `out_channels` and `kernel_size`

Explain the meaning of each parameter.

**Explain**
- `in_channels`: the depth of the image (aka: color channels)
- `out_channel`: the number of kernels
- `kernel_size`: r

##### Example 5

In [8]:
m = nn.Conv2d(in_channels=3, out_channels=7, kernel_size=3)

In [9]:
m.weight.shape

torch.Size([7, 3, 3, 3])

##### Example 6

In [128]:
_image = torch.arange(12288).reshape(3, 64, 64).float()

In [117]:
import torch.nn as nn

In [118]:
conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)

In [119]:
pool = nn.MaxPool2d(kernel_size=2, stride=2)

In [120]:
conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)

What is the output after each layer? Explain

In [121]:
_image.shape

torch.Size([3, 64, 64])

In [122]:
output = conv1(_image)

In [123]:
output.shape

torch.Size([6, 60, 60])

- `16` = `out_channel`
- `60` = `output_height` = `input_height - kernel_height + 1` = `64-5+1`
- `60` = `output_width` = `input_width - kernel_width + 1` = `64-5+1`

In [124]:
output = pool(output)

In [125]:
output.shape

torch.Size([6, 30, 30])

In [126]:
output = conv2(output)

In [127]:
output.shape

torch.Size([16, 26, 26])

- `16` = `out_channel`
- `26` = `output_height` = `input_height - kernel_height + 1` = `26=30-5+1`
- `26` = `output_width` = `width_height - kernel_width + 1` = `26=30-5+1`

##### Example 7

In [94]:
def calculate_shape_after_max_pool(input_width, kernel_size, padding, stride):
    return ((input_width - kernel_size + 2*padding) / stride) + 1

In [95]:
calculate_shape_after_max_pool(input_width=60, kernel_size=2, padding=0, stride=2)

30.0

In [96]:
calculate_shape_after_max_pool(5, 3, 0, 1)

3.0

##### Example 8

In [133]:
_image = torch.arange(3*64*64).reshape(3, 64, 64).float()

In [166]:
import torch.nn as nn

In [167]:
_image.shape

torch.Size([3, 64, 64])

Apply a convolution to `_image` with kernel size is `3x3`. `_image` is a color image with size `64x64`

Explain how you choose the parameters?

In [168]:
conv = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3, stride=1, padding=1)

**Explain**

So the image output has shape `5x64x64`

- `in_channels` = `3` because the depth of image (aka: number of color channels) is `3`
- `out_channels` = `5` because the output image has five kernels
- With `stride=1` and `padding=1`, it will keep the height and width of the output image as the input image

In [169]:
conv(_image).shape

torch.Size([5, 64, 64])

##### Example 8

In [46]:
batch = torch.rand(4, 3, 32, 32)

In [47]:
import torch.nn as nn

`batch` is a batch of 4 colors image, each imaga have size `32x32`

In [48]:
batch.shape

torch.Size([4, 3, 32, 32])

Flatten the pixels of each color channel using `PyTorch`

In [49]:
flatten = nn.Flatten(start_dim=2, end_dim=3)

In [50]:
flattened_batch = flatten(batch)

In [51]:
flattened_batch.shape

torch.Size([4, 3, 1024])

##### Example 9

In [24]:
batch = torch.rand(4, 3, 32, 32)

In [39]:
import torch.nn as nn

`batch` is a batch of 4 colors image, each imaga have size `32x32`

In [40]:
batch.shape

torch.Size([4, 3, 32, 32])

Flatten the three colors channel of each image using `PyTorch`

In [44]:
flatten = nn.Flatten(start_dim=1, end_dim=3)

In [42]:
flattened_batch = flatten(batch)

In [43]:
flattened_batch.shape

torch.Size([4, 3072])

##### Example 10

In [62]:
_image = torch.arange(3*64*64).reshape(3, 64, 64).float()

In [63]:
import torch.nn as nn

In [64]:
_image.shape

torch.Size([3, 64, 64])

Apply a convolution to `_image` with kernel size is `3x3`. `_image` is a color image with size `64x64`

Explain how you choose the parameters?

In [69]:
conv = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3)

In [68]:
conv(_image).shape

torch.Size([5, 62, 62])