In [2]:
# Building Models with PyTorch

# preliminary

import torch

# on a more specific level, models are subclasses of torch.nn.Module
# derived classes may override the methods of the parent class, but otherwise
# inherit the parent's methods and attributes

class TinyModel(torch.nn.Module):

    def __init__(self):
        super().__init__()

        self.linear1 = torch.nn.Linear(100, 200)
        self.activation = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(200, 10)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        x = self.softmax(x)
        return x
    
tinymodel = TinyModel()

print("The Model:")
print(tinymodel)

# In what follows, we go through some really popular layer types and how to use them


The Model:
TinyModel(
  (linear1): Linear(in_features=100, out_features=200, bias=True)
  (activation): ReLU()
  (linear2): Linear(in_features=200, out_features=10, bias=True)
  (softmax): Softmax(dim=None)
)


In [4]:
# Common Layer Types

# Linear Layer
# This is a dense, fully connected layer by default

lin = torch.nn.Linear(3, 2)
x = torch.rand(1, 3)
print("Input:")
print(x)

for param in lin.parameters():
    print(param)

y = lin(x)
print(y)

Input:
tensor([[0.9263, 0.8603, 0.3848]])
Parameter containing:
tensor([[-0.0094,  0.2088, -0.3791],
        [-0.4903, -0.3000, -0.4874]], requires_grad=True)
Parameter containing:
tensor([ 0.0507, -0.0682], requires_grad=True)
tensor([[ 0.0757, -0.9681]], grad_fn=<AddmmBackward0>)


In [None]:
# Convolutional Layers
# These essentially "slide" a filter over the input tensor (image) to produce 
# a feature map that is in lower in dimension
# The specific choice of filter depends on hyperparameters such as kernel size, stride, padding, etc.

# kernel size: the size of the filter
# stride: the number of pixels the filter moves each time
# padding: the number of pixels added to the input tensor to ensure the output tensor is the same size

# Easiest to visualize and understand convolutions with image data, but the same principles
# also apply to other data where the spatial relationships between elements are important
# e.g. text data
# I suppose time series data could also count as a "spatial" relationship, but one would
# need to be careful with specifying a one-sided convolution
# similar problem to generalised dynamic factor models

import torch.functional as F

# LeNet example
# Input: 1 x 32 x 32 black and white image
# Output: 10 classes

# First input to convolutional layer is the number of input channels
# in black and white case, it is 1. For colour images, it is 3 (RGB)
# Second input is the number of output channels. This is the number of filters
# applied to the input image, and the number of features we ask it to produce
# Third argument is the kernel/window size
# Note that it is possible to specify a height different to width by passing a tuple
# although this is somewhat strange (square filters are the norm)

# A convolutional layer will produce an activation map
# which is a 3d tensor of dimension 
# (number of output channels, height, width)
# The height and width are determined by the input image size, kernel size, stride, and padding

# In this case, the FIRST layer outputs 6 x 28 x 28, where the 28 is determined by the fact that
# when scanning a 5 pixel window over a 32 pixel row/col, there are only 32 - 5 = 28 valid 
# positions

# This is then passed through a relu activation function
# and then through a max pooling layer with a 2 x 2 window, which merges each 
# 2 x 2 windows into a single cell
# This results in a 6 x 14 x 14 output, where the 14 is determined by 28 / 2 = 14

# The second layer expects 6 input channels due to the output of the first layer,
# has 16 output channels, and a 3 x 3 kernel
# This results in a 
# 16 x 12 x 12 output, where the 12 is determined by the fact that when scanning a 3 pixel window
# over a 28 pixel row/col, there are only 28 - 3 = 12 valid positions

class LeNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # 1 input image channel (black and white), 6 output channels, 
        # 5 x 5 square convolution kernel
        self.conv1 = torch.nn.Conv2d(1, 6, kernel_size = 5)
        self.conv2 = torch.nn.Conv2d(6, 16, kernel_size = 3)
        # affine operation
        # 6 x 6 from image dimension
        self.fc1 = torch.nn.Linear(16 * 6 * 6, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

        # forward pass
        def forward(self, x):
            # Max pooling over a (2, 2) window
            x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
            # If the size is a square, you can specify with a single number
            x = F.max_pool2d(F.relu(self.conv2(x)), 2)
            x = x.view(-1, self.num_flat_features(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            return x
        
        def num_flat_features(self, x):
            # i.e. call all dimensions except the batch dimension
            size = x.size()[1:]
            # initilise a variable to store the number of features
            num_features = 1
            for s in size:
                num_features *= s
            return num_features

# I think this documentation is actually wrong, embarassing..


In [None]:
# Recurrent Layers
# These are used for sequential data, where the order of the data matters
# e.g. time series, text, etc.

# Variants include
# vanilla RNN, LSTMs, GRU

class LSTMagger(torch.nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super().__init__()

        self.hidden_dim = hidden_dim
        
        self.lstm = torch.nn.LSTM(10, 20, 2)
        self.linear = torch.nn.Linear(20, 10)

    def forward(self, x):
        # initial hidden state
        h0 = torch.zeros(2, x.size(1), 20)
        # initial cell state
        c0 = torch.zeros(2, x.size(1), 20)
        # forward pass
        x, _ = self.lstm(x, (h0, c0))
        x = self.linear(x)
        return x


In [None]:
# Transformers

