In [1]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import numpy as np

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
%matplotlib inline
import sklearn.metrics
import seaborn as sns
import random

In [2]:
# Use cuda if present
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Device available now:', device)

Device available now: cpu


In [3]:
def set_seed(seed = 1234):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed()

### Data

In [4]:
# Customized transform (transforms to tensor, here you can normalize, perform Data Augmentation etc.)
my_transform = transforms.Compose([transforms.ToTensor()])

# Download data
mnist_train = torchvision.datasets.MNIST('data', train = True, download=True, transform=my_transform)
mnist_test = torchvision.datasets.MNIST('data', train = False, download=True, transform=my_transform)

In [5]:
print(len(mnist_train), mnist_train)
print(len(mnist_test))

60000 Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )
10000


In [6]:
#  Create a train_loader to select a batch from it
train_loader_example = torch.utils.data.DataLoader(mnist_train, batch_size=64)

# Taking a single batch of the images
images, labels = next(iter(train_loader_example))
print('1. original images shape:', images.shape)

# Remove channel from shape
images = images.reshape(-1, 28, 28)
print('2. reshaped images shape:', images.shape, '\n')

1. original images shape: torch.Size([64, 1, 28, 28])
2. reshaped images shape: torch.Size([64, 28, 28]) 



In [7]:
len(train_loader_example)

938

## Model architecture

In [79]:
class MultilayerRNN_MNIST(nn.Module):
    def __init__(self, 
                 input_size, 
                 hidden_size, 
                 layer_size, 
                 output_size, 
                 skip_connections = True, 
                 pooling = "mean"):
        
        super(MultilayerRNN_MNIST, self).__init__()
        self.input_size = input_size 
        self.hidden_size = hidden_size, 
        self.layer_size = layer_size, 
        self.output_size = output_size
        self.pooling = pooling
        self.skip_connections = skip_connections
        self.activation = nn.GLU()

        self.rnn = nn.RNN(input_size, hidden_size, layer_size, batch_first=True, nonlinearity='tanh')
        self.MLP = nn.Sequential(
            nn.Linear(hidden_size, hidden_size * 2),
            self.activation,
        )
        self.output_linear = nn.Linear(hidden_size, output_size)
        
        
    def forward(self, images, prints=False):
        if prints: print('images shape:', images.shape)
        
        # Instantiate hidden_state at timestamp 0
        hidden_state = torch.zeros(self.layer_size, images.size(0), self.hidden_size)
        hidden_state = hidden_state.requires_grad_()
        if prints: print('Hidden State shape:', hidden_state.shape)
        
        # Compute RNN
        output, last_hidden_state = self.rnn(images, hidden_state)
        ### output: (batch, L, hidden_size) ### (last hidden layer)
        ### last_hidden_state: (num_layers, batch, hidden_size) ###
        if prints: print('RNN Output shape:', output.shape, '\n' +
                         'RNN last_hidden_state shape', last_hidden_state.shape)

        ###  Skip connection + MLP   #### 
        if self.skip_connections:                    
            output += self.MLP(output) 
        if prints: print('MLP Output shape:', output.shape)

        ### Pooling + Output Linear ###
        if self.pooling == "none":
            output = self.output_linear(output[:, -1, :])
            #use the last hidden state

        elif self.pooling == "mean": 
            output = self.output_linear(torch.mean(output, axis= 1)) 
            # time pooling of all hidden state
        if prints: print('FNN Output shape:', output.shape)
        return output

In [80]:
# ==== STATICS ====
batch_size = 64
input_size = 28
hidden_size = 100  
layer_size = 5         
output_size = 10


In [81]:
# Create model instance
multilayer_rnn_example = MultilayerRNN_MNIST(input_size, hidden_size, layer_size, output_size)
print(multilayer_rnn_example)


# Making log predictions:
out = multilayer_rnn_example(images, prints=True)

MultilayerRNN_MNIST(
  (activation): GLU(dim=-1)
  (rnn): RNN(28, 100, num_layers=5, batch_first=True)
  (MLP): Sequential(
    (0): Linear(in_features=100, out_features=200, bias=True)
    (1): GLU(dim=-1)
  )
  (output_linear): Linear(in_features=100, out_features=10, bias=True)
)
images shape: torch.Size([64, 28, 28])
Hidden State shape: torch.Size([5, 64, 100])
RNN Output shape: torch.Size([64, 28, 100]) 
RNN last_hidden_state shape torch.Size([5, 64, 100])
MLP Output shape: torch.Size([64, 28, 100])
FNN Output shape: torch.Size([64, 10])


In [42]:
def get_accuracy(out, actual_labels, batchSize):
    '''Saves the Accuracy of the batch.
    Takes in the log probabilities, actual label and the batchSize (to average the score).'''
    predictions = out.max(dim=1)[1]
    correct = (predictions == actual_labels).sum().item()
    accuracy = correct/batch_size
    
    return accuracy

In [43]:
def train_network(model, train_data, test_data, batchSize=64, num_epochs=1, learning_rate=0.001):
    
    '''Trains the model and computes the average accuracy for train and test data.'''
    
    print('Get data ready...')
    # Create dataloader for training dataset - so we can train on multiple batches
    # Shuffle after every epoch
    train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batchSize, shuffle=True, drop_last=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batchSize, shuffle=True, drop_last=True)
    
    # Create criterion and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    
    print('Training started...')
    # Train the data multiple times
    for epoch in range(num_epochs):
        
        # Save Train and Test Loss
        train_loss = 0
        train_acc = 0
        
        # Set model in training mode:
        model.train()
        
        for k, (images, labels) in enumerate(train_loader):
            
            # Get rid of the channel
            images = images.view(-1, 28, 28)
            
            # Create log probabilities
            out = model(images)
            # Clears the gradients from previous iteration
            optimizer.zero_grad()
            # Computes loss: how far is the prediction from the actual?
            loss = criterion(out, labels)
            # Computes gradients for neurons
            loss.backward()
            # Updates the weights
            optimizer.step()
            
            # Save Loss & Accuracy after each iteration
            train_loss += loss.item()
            train_acc += get_accuracy(out, labels, batchSize)
            
        
        # Print Average Train Loss & Accuracy after each epoch
        print('TRAIN | Epoch: {}/{} | Loss: {:.2f} | Accuracy: {:.2f}'.format(epoch+1, num_epochs, train_loss/k, train_acc/k))
            
            
    print('Testing Started...')
    # Save Test Accuracy
    test_acc = 0
    # Evaluation mode
    model.eval()
    
    for k, (images, labels) in enumerate(test_loader):
        # Get rid of the channel
        images = images.view(-1, 28, 28)
        
        # Create logit predictions
        out = model(images)
        # Add Accuracy of this batch
        test_acc += get_accuracy(out, labels, batchSize)
        
    # Print Final Test Accuracy
    print('TEST | Average Accuracy per {} Loaders: {:.5f}'.format(k, test_acc/k) )

In [82]:
# ==== STATICS ====
batch_size = 64
input_size = 28
hidden_size = 100  
layer_size = 2         
output_size = 10

# Instantiate the model
# We'll use TANH as our activation function
multilayer_rnn = MultilayerRNN_MNIST(input_size, hidden_size, layer_size, output_size, pooling= "mean")

# ==== TRAIN ====
train_network(multilayer_rnn, mnist_train, mnist_test, num_epochs=3)

Get data ready...
Training started...
TRAIN | Epoch: 1/3 | Loss: 0.77 | Accuracy: 0.75
TRAIN | Epoch: 2/3 | Loss: 0.20 | Accuracy: 0.94
TRAIN | Epoch: 3/3 | Loss: 0.14 | Accuracy: 0.96
Testing Started...
TEST | Average Accuracy per 155 Loaders: 0.97409
