# Deep Learning Course: Lab Exercises

In this lab exercise you will:

Learn about RNN and LSTM on toy sequential tasks:

Part 1: RNN and LSTM based 1D signal classifier & predictor

Part 2: RNN and LSTM based names classification with a character-level model

In [None]:
### Connect to Google Drive if you are using Drive.
from google.colab import drive

drive = drive.mount('/content/drive')

In [None]:
# working folder
import os
working_dir = 'drive/MyDrive/...'
os.chdir(working_dir)

In [None]:
### Import all your libraries
import random 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.utils.data as data

import numpy as np 
from scipy import signal

from sklearn.metrics import f1_score

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

## Part 1: RNN and LSTM based 1D signal classifier & predictor

### 1.A. Data

In [None]:
## Sine, Sawtooth, Tirangloe and Square wave generators
def get_noise():
    std = random.uniform(0, 0.1)
    noise = np.random.normal(0, std, 500)
    return noise

def get_sine():
    noise = get_noise()
    time = np.linspace(0, 1, 500)
    repeats = random.randrange (5, 10)
    sine = np.sin(repeats * np.pi * time) + noise
    return time, sine

def get_triangle():
    noise = get_noise()
    time = np.linspace(0, 1, 500)
    repeats = random.randrange (5, 10)
    triangle = signal.sawtooth(repeats * np.pi * time, 0.5) + noise
    return time, triangle

def get_square():
    noise = get_noise()
    time = np.linspace(0, 1, 500)
    repeats = random.randrange (5, 10)
    square = signal.square(repeats * np.pi * time, 0.5) + noise
    return time, square

def get_sawtooth():
    noise = get_noise()
    time = np.linspace(0, 1, 500)
    repeats = random.randrange (5, 10)
    sawtooth = signal.sawtooth(repeats * np.pi * time) + noise
    return time, sawtooth
        
plt.plot(*get_sine())
plt.show()

plt.plot(*get_triangle())
plt.show()

plt.plot(*get_square())
plt.show()

plt.plot(*get_sawtooth())
plt.show()

### 1.B. RNN Wave Classification

In [None]:
## Dataloader 

class WavePreloader(data.Dataset):

    def __init__(self, samples=1000):
        self.samples = samples # Total samples to be generated in the dataset
        self.funcs = [get_sine, get_triangle, get_sawtooth, get_square] # functions to generate a sample

    def __getitem__(self, index):
        cls_lbl = random.randint(0, 3) # randomly get a function to generate
        _, inp = self.funcs[cls_lbl]() # generate wave
        
        return inp.reshape(-1, 1), cls_lbl # return generated wave and corresponding class label

    def __len__(self):
        return self.samples


In [None]:
## RNN classifier

# *****START CODE
class WaveClassifier(nn.Module):
    def __init__(self, n_classes=4):
        super(WaveClassifier, self).__init__()
        # Define a 1 layer RNN which gives 8 dimensional feature map ouput
        # Define a linear layer that takes a vector of size 8 as input and gives num classes as ouput
        
    def forward(self, x):
         # create h0 tensor which has same size as input x
         # forward pass through rnn
         # apply linear layer to last time step of rnn output
        return x
# *****END CODE

In [None]:
## Instantiate dataloader, optimizer, loss, and network

lr = 0.01
batch_size = 100 
epochs = 100

# *****START CODE
# define training dataset
# define test dataset

# define train loader
# define test loader

# define a multi class loss 
# instantiate your network
# instantiate your optimizer
# *****END CODE

In [None]:
## Train your network

# *****START CODE
for epoch in range(epochs):
    running_train_loss = []   # variable to accumulate losses
    running_test_loss = []
    
    train_predictions = []    # variable to accumulate predictions and ground truth labels
    train_ground_truths = []
    
    test_predictions = []
    test_ground_truths = []
    
    # train loop
    
    model.train()
    for inp_batch, lbl_batch in train_loader:
    
    # test loop
    model.eval()
    for inp_batch, lbl_batch in test_loader:
      
    # print mean epoch loss and f1 score using accumulated loss and accumulated labels
    print (f"\n###### Epoch {epoch} ######")
    print ("Train Loss : ", np.mean(running_train_loss))
    print ("Train F1 Score : ", f1_score(train_ground_truths, train_predictions, average="macro"))
    
    print ("Test Loss : ", np.mean(running_test_loss))
    print ("Test F1 Score : ", f1_score(test_ground_truths, test_predictions, average="macro"))
# *****END CODE

### 1.C. LSTM Wave prediction


In [None]:
## Dataloader 

class WavePredPreloader(data.Dataset):

    def __init__(self, samples=1000):
        self.samples = samples # Total samples to be generated in the dataset
        self.funcs = [get_sine, get_triangle, get_sawtooth, get_square] # functions to generate a sample

    def __getitem__(self, index):
        cls_lbl = random.randint(0, 3) # randomly get a function to generate
        _, inp = self.funcs[cls_lbl]() # generate wave
        first_half = inp[:250]
        second_half = inp[250:]
        return first_half.reshape(-1, 1), second_half.reshape(-1, 1) # return generated wave split in half as input and label

    def __len__(self):
        return self.samples

In [None]:
## LSTM regressor

# *****START CODE
class WavePredictor(nn.Module):
    def __init__(self, n_classes=4):
        super(WavePredictor, self).__init__()
        # Define a 1 layer LSTM which gives 8 dimensional feature map ouput
        # Define a 1 layer LSTM which gives 8 dimensional input and 1 dimentionsnal ouput
        
    def forward(self, x):
        # create h0 tensor which has same size as input x
        # create c0 tensor which has same size as input x
        x, (hn, cn) = 
        
        # create h0 tensor which has same size as input x
        # create c0 tensor which has same size as input x
        x, (hn, cn) = 
        return x
# *****END CODE

In [None]:
## Instantiate dataloader, optimizer, loss, and network

lr = 0.0001
batch_size = 100
epochs = 10

# *****START CODE
# define training dataset
# define test dataset

# define train loader
# define test loader

# define a MSE loss 
# instantiate your network
# instantiate your optimizer
# *****END CODE

In [None]:
## Train your network

# *****START CODE
for epoch in range(epochs):
    running_train_loss = []   # variable to accumulate losses
    running_test_loss = []
    
    # train loop
    
    model.train()
    for inp_batch, lbl_batch in train_loader:
        model.zero_grad()
    
    # test loop
    model.eval()
    for inp_batch, lbl_batch in test_loader:
      
    # print mean epoch loss and f1 score using accumulated loss and accumulated labels
    print (f"\n###### Epoch {epoch} ######")
    print ("Train Loss : ", np.mean(running_train_loss))
    
    print ("Test Loss : ", np.mean(running_test_loss))
# *****END CODE

In [None]:
# Plot some evaluation examples

model.eval()
with torch.no_grad():
    inp_batch, lbl_batch = next(iter(test_loader))
    inp_batch = Variable(inp_batch).float()
    lbl_batch = Variable(lbl_batch).float()
    pred = model(inp_batch)

examples_to_show = 8
fig, axs = plt.subplots(examples_to_show//2, 2, figsize=(10,20))
for i in range(examples_to_show):
    axs[i//2, i%2].plot(np.arange(0,250,1), inp_batch.detach().numpy()[i])
    axs[i//2, i%2].plot(np.arange(250,500,1), lbl_batch.detach().numpy()[i])
    axs[i//2, i%2].plot(np.arange(250,500,1), pred.detach().numpy()[i])
    axs[i//2, i%2].legend(['input', 'ground truth', 'prediction'])

## Part 2: RNN and LSTM based names classification with a character-level model

### 2.A. Data

In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os

def findFiles(path): return glob.glob(path)

print(findFiles('data/names/*.txt'))

import unicodedata
import string

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

print(unicodeToAscii('Ślusàrski'))

In [None]:
# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []

# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

for filename in findFiles('data/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

In [None]:
print(category_lines['Italian'][:5])

In [None]:
names_data = []
max_length = 0
for category in category_lines.keys():
    lengths = [len(x) for x in category_lines[category]]
    if np.max(lengths)> max_length:
        max_length = np.max(lengths)
    names_data += [[x, category] for x in category_lines[category]]
    
print(names_data[:5])
print("Names' maximum length =", max_length)

In [None]:
# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return all_letters.find(letter)

# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

# Turn a line into a <max_length x n_letters>,
# or an array of one-hot letter vectors
def lineToTensor(line, max_length=100):
    tensor = torch.zeros(max_length, n_letters)
    for li, letter in enumerate(line):
        tensor[li][letterToIndex(letter)] = 1
    return tensor

print(letterToTensor('J'))

print(lineToTensor('Jones', max_length=max_length).size())

In [None]:
class NamesDataset(data.Dataset):
    def __init__(self, names_data, max_length=100):
        self.data = names_data
        self.max_length = max_length

    def __getitem__(self, index):
        line, category = self.data[index]
        category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
        line_tensor = lineToTensor(line, max_length = self.max_length)
        return line_tensor, category_tensor

    def __len__(self):
        return len(self.data)

### 2.B. Name Classification 

In [None]:
# Define a RNN or LSTM model of your choice for names classification

# *****START CODE
class NameClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NameClassifier, self).__init__()
        
    def forward(self, x): 
        return x
# *****END CODE

In [None]:
## Instantiate dataloader, optimizer, loss, and network

n_hidden = 32
lr = 0.01
batch_size = 128
epochs = 30

# *****START CODE
random.shuffle(names_data)
# define training dataset
# define test dataset

# define train loader
# define test loader

# instantiate your network
# instantiate your optimizer
# instantiate your loss
# *****END CODE

In [None]:
!pip install torchnet
import torchnet as tnt

# define confusion matrix using tnt package
confusion_matrix = tnt.meter.ConfusionMeter(n_categories)

In [None]:
## Train your network

# *****START CODE
train_loss = []  
test_loss = []
for epoch in range(epochs):
    running_train_loss = []   # variable to accumulate losses
    running_test_loss = []
    
    # train loop
    model.train()
    for line_tensor_btch, category_tensor_batch in train_loader:
    
    # test loop
    confusion_matrix.reset()
    model.eval()
    for line_tensor_btch, category_tensor_batch in test_loader:
    
    # print mean epoch loss and f1 score using accumulated loss and accumulated labels
    print (f"\n###### Epoch {epoch} ######")
    train_loss.append(np.mean(running_train_loss))
    print ("Train Loss : ", np.mean(running_train_loss))
    test_loss.append(np.mean(running_test_loss))    
    print ("Val Loss : ", np.mean(running_test_loss))
    #print("Confusion Matrix:")
    #print(confusion_matrix.conf)
    val_acc=(np.trace(confusion_matrix.conf)/float(np.ndarray.sum(confusion_matrix.conf))) *100
    print ("Val Acc : ", val_acc)
# *****END CODE

In [None]:
plt.figure()
plt.plot(train_loss)
plt.plot(test_loss)

In [None]:
confusion = confusion_matrix.conf

# Set up plot
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
cax = ax.matshow(confusion, cmap=plt.get_cmap('cividis'), vmax=50)
fig.colorbar(cax)

# Set up axes
ax.set_xticklabels([''] + all_categories, rotation=90)
ax.set_yticklabels([''] + all_categories)

# Force label at every tick
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

# sphinx_gallery_thumbnail_number = 2
plt.show()