### RNN, LSTM, GRU: Number sequence

In [1]:
import time
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data import random_split


In [2]:
batch_size = 64
num_epochs = 15
learning_rate = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

numbers ='0123456789'

In [3]:
def generate_sequence():
    """Generate sequence:
    y_1 = x_1
    y_i = x_i + x_1
    if y_i >=10, then y_i = y_i-10,
    and split to X and Y."""
    
    sequence = np.zeros(shape=((np.random.randint(1000, 1100)), 2), dtype=int)
    for i in range(sequence.shape[0]):
        sequence[i, 0] = np.random.randint(0, 10)
    x_first = sequence[0, 0]
    sequence[0, 1] = x_first
    for i in range(1, sequence.shape[0]):
        num = sequence[i, 0]
        y_i = num + x_first
        if y_i >= 10:
            y_i = y_i - 10
        sequence[i, 1] = y_i
    sequence = torch.tensor(sequence).long()
    X = sequence[:, 0]
    Y = sequence[:, 1]
    return X, Y

In [4]:
X, Y = generate_sequence()
print(f'Sequence shape: {X.shape}')

Sequence shape: torch.Size([1051])


### Data preparation

In [5]:
dataset = TensorDataset(X, Y)

ratio = .8
n_total = len(dataset)
n_train = int(n_total * ratio)
n_val = n_total - n_train
train_data, val_data = random_split(dataset, [n_train, n_val])

train_data = DataLoader(train_data, batch_size, shuffle=True)
val_data = DataLoader(val_data, batch_size)

#### Model

In [6]:
class FlexRNN(torch.nn.Module):
    def __init__(self, rnnClass, dictionary_size, embedding_size,
                 num_hiddens, num_classes):
        super().__init__()
        self.num_hiddens = num_hiddens
        self.embedding = torch.nn.Embedding(dictionary_size, embedding_size)
        self.hidden = rnnClass(embedding_size, num_hiddens, batch_first=True)
        self.output = torch.nn.Linear(num_hiddens, num_classes)

    def forward(self, X):
        embed = self.embedding(X)
        output, hidden = self.hidden(embed)
        output = self.output(output)
        return output

In [7]:
def train_model(model, num_epochs, train_data, val_data, 
                criterion, optimizer):

    for epoch in range(num_epochs):
        start = time.time()

        train_acc = []
        train_loss = []
        val_acc = []
        val_loss = []

        model.train()
        for x_b, y_b in train_data:
            optimizer.zero_grad()
            answers = model(x_b)  # answers: 32 x 10
            loss = criterion(answers, y_b)  # y: 32
            train_loss.append(loss.item())
            batch_acc = (answers.argmax(dim=1) == y_b)
            train_acc.append(batch_acc.sum().item() / batch_acc.shape[0])

            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            for x_b, y_b in val_data:
                answers = model(x_b)
                loss = criterion(answers, y_b)
                val_loss.append(loss.item())
                batch_acc = (answers.argmax(dim=1) == y_b)
                val_acc.append(batch_acc.sum().item() / batch_acc.shape[0])

        print("Epoch {}".format(epoch))
        print("Train loss: {:.3f} Train accuracy: {:.3f}".format(np.mean(train_loss),
                                                                 (np.mean(train_acc))),
                                                                  end=" |  ")
        print("Validation loss: {:.3f} Validation accuracy: {:.3f}".format(np.mean(val_loss),
                                                             (np.mean(val_acc))))
    return None


#### RNN

In [8]:
model_rnn = FlexRNN(torch.nn.RNN, len(numbers), 28, 128, len(numbers)).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_rnn.parameters(), lr=learning_rate)

In [9]:
train_model(model_rnn, num_epochs, train_data, val_data,
            criterion, optimizer)

Epoch 0
Train loss: 1.925 Train accuracy: 0.641 |  Validation loss: 1.531 Validation accuracy: 0.996
Epoch 1
Train loss: 1.229 Train accuracy: 0.998 |  Validation loss: 0.911 Validation accuracy: 1.000
Epoch 2
Train loss: 0.696 Train accuracy: 0.999 |  Validation loss: 0.474 Validation accuracy: 1.000
Epoch 3
Train loss: 0.345 Train accuracy: 0.999 |  Validation loss: 0.236 Validation accuracy: 1.000
Epoch 4
Train loss: 0.181 Train accuracy: 0.999 |  Validation loss: 0.126 Validation accuracy: 1.000
Epoch 5
Train loss: 0.106 Train accuracy: 0.999 |  Validation loss: 0.078 Validation accuracy: 1.000
Epoch 6
Train loss: 0.071 Train accuracy: 0.999 |  Validation loss: 0.053 Validation accuracy: 1.000
Epoch 7
Train loss: 0.053 Train accuracy: 0.999 |  Validation loss: 0.040 Validation accuracy: 1.000
Epoch 8
Train loss: 0.042 Train accuracy: 0.999 |  Validation loss: 0.031 Validation accuracy: 1.000
Epoch 9
Train loss: 0.035 Train accuracy: 0.999 |  Validation loss: 0.026 Validation accura

In [10]:
idx = np.random.randint(5, 15)
results = model_rnn(X).argmax(dim=1)
acc = (results == Y).flatten()
acc = (acc.sum() / acc.shape[0])
output = results[:idx]
true_input = Y[:idx]

print(f"Accuracy: {acc:.3f}")
print(f"Validation number: '{output}'")
print(f"True number: '{true_input}'")

Accuracy: 0.999
Validation number: 'tensor([6, 2, 5, 6, 4, 8, 5, 8, 6, 0, 6, 0, 2, 2])'
True number: 'tensor([8, 2, 5, 6, 4, 8, 5, 8, 6, 0, 6, 0, 2, 2])'


#### LSTM

In [11]:
model_lstm = FlexRNN(torch.nn.LSTM, len(numbers), 28, 128, len(numbers)).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=learning_rate)

In [12]:
train_model(model_lstm, num_epochs, train_data, val_data, 
            criterion, optimizer)

Epoch 0
Train loss: 2.177 Train accuracy: 0.564 |  Validation loss: 2.024 Validation accuracy: 0.795
Epoch 1
Train loss: 1.867 Train accuracy: 0.882 |  Validation loss: 1.623 Validation accuracy: 0.872
Epoch 2
Train loss: 1.331 Train accuracy: 0.949 |  Validation loss: 0.982 Validation accuracy: 1.000
Epoch 3
Train loss: 0.718 Train accuracy: 0.999 |  Validation loss: 0.471 Validation accuracy: 1.000
Epoch 4
Train loss: 0.332 Train accuracy: 0.999 |  Validation loss: 0.218 Validation accuracy: 1.000
Epoch 5
Train loss: 0.167 Train accuracy: 0.999 |  Validation loss: 0.120 Validation accuracy: 1.000
Epoch 6
Train loss: 0.095 Train accuracy: 0.999 |  Validation loss: 0.079 Validation accuracy: 1.000
Epoch 7
Train loss: 0.070 Train accuracy: 0.999 |  Validation loss: 0.059 Validation accuracy: 1.000
Epoch 8
Train loss: 0.053 Train accuracy: 0.999 |  Validation loss: 0.047 Validation accuracy: 1.000
Epoch 9
Train loss: 0.048 Train accuracy: 0.999 |  Validation loss: 0.038 Validation accura

In [13]:
idx = np.random.randint(5, 15)
results = model_rnn(X).argmax(dim=1)
acc = (results == Y).flatten()
acc = (acc.sum() / acc.shape[0])
output = results[:idx]
true_input = Y[:idx]

print(f"Accuracy: {acc:.3f}")
print(f"Validation number: '{output}'")
print(f"True number: '{true_input}'")

Accuracy: 0.999
Validation number: 'tensor([6, 2, 5, 6, 4, 8, 5, 8])'
True number: 'tensor([8, 2, 5, 6, 4, 8, 5, 8])'


#### GRU

In [14]:
model_gru = FlexRNN(torch.nn.GRU, len(numbers), 28, 128, len(numbers)).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_gru.parameters(), lr=learning_rate)

In [15]:
train_model(model_gru, num_epochs, train_data, val_data, 
            criterion, optimizer)

Epoch 0
Train loss: 2.059 Train accuracy: 0.602 |  Validation loss: 1.813 Validation accuracy: 0.901
Epoch 1
Train loss: 1.586 Train accuracy: 0.946 |  Validation loss: 1.286 Validation accuracy: 0.996
Epoch 2
Train loss: 0.994 Train accuracy: 0.998 |  Validation loss: 0.672 Validation accuracy: 1.000
Epoch 3
Train loss: 0.469 Train accuracy: 0.999 |  Validation loss: 0.280 Validation accuracy: 1.000
Epoch 4
Train loss: 0.203 Train accuracy: 0.999 |  Validation loss: 0.127 Validation accuracy: 1.000
Epoch 5
Train loss: 0.104 Train accuracy: 0.999 |  Validation loss: 0.071 Validation accuracy: 1.000
Epoch 6
Train loss: 0.064 Train accuracy: 0.999 |  Validation loss: 0.047 Validation accuracy: 1.000
Epoch 7
Train loss: 0.048 Train accuracy: 0.999 |  Validation loss: 0.035 Validation accuracy: 1.000
Epoch 8
Train loss: 0.037 Train accuracy: 0.999 |  Validation loss: 0.028 Validation accuracy: 1.000
Epoch 9
Train loss: 0.033 Train accuracy: 0.999 |  Validation loss: 0.023 Validation accura

In [16]:
idx = np.random.randint(5, 15)
results = model_rnn(X).argmax(dim=1)
acc = (results == Y).flatten()
acc = (acc.sum() / acc.shape[0])
output = results[:idx]
true_input = Y[:idx]

print(f"Accuracy: {acc:.3f}")
print(f"Validation number: '{output}'")
print(f"True number: '{true_input}'")

Accuracy: 0.999
Validation number: 'tensor([6, 2, 5, 6, 4, 8, 5, 8, 6, 0, 6])'
True number: 'tensor([8, 2, 5, 6, 4, 8, 5, 8, 6, 0, 6])'
