# Simple RNN Seq2Seq

In [1]:
import os
import numpy as np
import datetime
import time

import torch
import torch.nn as nn

import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.cuda.amp import GradScaler, autocast
from torch.autograd import Variable

import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots
import seaborn as sns
plt.style.use('/kaggle/input/sonia-mplstyle/sonia.mplstyle')
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

PATH = '/kaggle/input/thunderstorm-data/data/'

In [2]:
print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## 1. Get the Data

In [3]:
class ThunderstormDataset(Dataset):
    def __init__(self, file_paths):
        self.file_paths = file_paths
        self.dataset_sizes = [np.load(file_path, mmap_mode='r').shape[0] for file_path in file_paths]
        self.cumulative_sizes = np.cumsum(self.dataset_sizes)
        self.total_size = self.cumulative_sizes[-1]

    def __len__(self):
        return self.total_size

    def __getitem__(self, idx):
        # Find which file this idx belongs to
        file_idx = np.searchsorted(self.cumulative_sizes, idx, side='right')
        if file_idx == 0:
            within_file_idx = idx
        else:
            within_file_idx = idx - self.cumulative_sizes[file_idx - 1]
        
        file_path = self.file_paths[file_idx]
        data = np.load(file_path, mmap_mode='r')
        item = data[within_file_idx]
        
        inputs = torch.tensor(item[:6].reshape(6, -1), dtype=torch.float32)  # Flatten the first 6 images
        targets = torch.tensor(item[6:].reshape(6, -1), dtype=torch.float32)  # Flatten the last 6 images
        
        return inputs, targets

In [4]:
train_file_paths = [PATH + file for file in sorted(os.listdir(PATH))][0:40]
test_file_paths = [PATH + file for file in sorted(os.listdir(PATH))][40]

In [5]:
train_dataset = ThunderstormDataset(train_file_paths)
test_dataset = ThunderstormDataset([test_file_paths])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

## 2. Create models

### 2.1. Encoders

In [6]:
class RNNEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout=0.5):
        super(RNNEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)  # [num_layers, b, hidden_size]
        out, hn = self.rnn(x, h0)  # [b, t, hidden_size]
        return out, hn  # Return the final hidden state

In [7]:
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout=0.5):
        super(LSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, (hn, cn) = self.lstm(x, (h0, c0))
        return out, (hn, cn)  # Return output and final hidden & cell states

In [8]:
class GRUEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout=0.5):
        super(GRUEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, hn = self.gru(x, h0)
        return out, hn  # Return output and final hidden state

### 2.2. Decoders

In [9]:
class RNNDecoder(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout=0.5):
        super(RNNDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(hidden_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.output_to_hidden = nn.Linear(output_size, hidden_size)

    def forward(self, x, hn):
        out, hn = self.rnn(x, hn)  # Use the hidden state from the encoder as the initial hidden state for the decoder
        out = self.fc(out)  # Map to output space
        x = self.output_to_hidden(out)
        return out, hn, x

In [10]:
class LSTMDecoder(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout=0.5):
        super(LSTMDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.output_to_hidden = nn.Linear(output_size, hidden_size)

    def forward(self, x, hidden):
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out)
        x = self.output_to_hidden(out)  # Transform output to match the hidden_size
        return out, hidden, x

In [11]:
class GRUDecoder(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout=0.5):
        super(GRUDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.output_to_hidden = nn.Linear(output_size, hidden_size)

    def forward(self, x, hn):
        out, hn = self.gru(x, hn)
        out = self.fc(out)
        x = self.output_to_hidden(out)  # Transform output to match the hidden_size
        return out, hn, x

### 2.3. Seq2Seq Model

In [12]:
class Seq2SeqModel(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2SeqModel, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, x, target_seq_length):
        encoder_output, encoder_hidden = self.encoder(x)  # Encode the input sequence
        decoder_input = encoder_output[:, -1:, :]  # Use the last hidden state of encoder as the first input for decoder
        decoder_hidden = encoder_hidden

        outputs = []
        for _ in range(target_seq_length):
            decoder_output, decoder_hidden, decoder_input  = self.decoder(decoder_input, decoder_hidden)
            outputs.append(decoder_output)

        outputs = torch.cat(outputs, dim=1)  # Concatenate outputs along sequence dimension
        return outputs

## 3. Specify training loop

In [13]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_loss = np.Inf

    def __call__(self, test_loss, model):
        score = -test_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(test_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(test_loss, model)
            self.counter = 0

    def save_checkpoint(self, test_loss, model):
        """
        Saves model when test loss decreases
        """
        self.best_loss = test_loss
        torch.save(model.state_dict(), 'checkpoint.pt')

In [14]:
def evaluate(model, data_loader, criterion, seq_len):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs, seq_len)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(data_loader)

In [15]:
def train(model, train_loader, test_loader, seq_len, num_epochs, learning_rate, device):
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)
    early_stopping = EarlyStopping(patience=5, verbose=True)

    train_losses = []
    test_losses = []

    for epoch in range(num_epochs):
        tic = time.perf_counter()
        model.train()
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs, seq_len)
            loss = criterion(outputs, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
            optimizer.step()
   
        train_loss = evaluate(model, train_loader, criterion, seq_len)
        test_loss = evaluate(model, test_loader, criterion, seq_len)
        scheduler.step(test_loss)

        train_losses.append(train_loss)
        test_losses.append(test_loss)

        toc = time.perf_counter()
        sec = round(toc - tic)
        print(f"Epoch {epoch + 1:>2}/{num_epochs}, Train Loss: {train_loss:.2f}, Test Loss: {test_loss:.2f}, Current LR: {scheduler.get_last_lr()[0]}, Time: {str(datetime.timedelta(seconds=sec))}")

        early_stopping(test_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    # Load the best model
    model.load_state_dict(torch.load("checkpoint.pt"))

    return train_losses, test_losses

## 4. Initialize and train models

In [16]:
input_dim = 256 * 256
hidden_dim = 128
num_layers = 2
output_dim = 256 * 256
seq_len = 6
learning_rate = 0.001
num_epochs = 30

### 4.1. RNN

In [17]:
rnn = Seq2SeqModel(
    RNNEncoder(input_dim, hidden_dim, num_layers),
    RNNDecoder(hidden_dim, output_dim, num_layers)
).to(device)

In [18]:
train_losses, test_losses = train(rnn, train_loader, test_loader, seq_len, num_epochs, learning_rate, device)

In [19]:
ax = subplots(figsize=(10, 4))[1]
ax.plot(train_losses, label='Train Loss')
ax.plot(test_losses, label='Test Loss')
ax.set_xlabel('Epoch')
ax.set_ylabel('MSE Loss')
ax.set_title('RNN')
plt.legend();

In [20]:
criterion = nn.MSELoss()
test_loss = evaluate(rnn, test_loader, criterion, seq_len)
print(f'Test RMSE: {test_loss ** 0.5:.4f}')

In [21]:
model_name = "RNN"
torch.save(rnn.state_dict(), f"{model_name}.pt")
np.save(f"{model_name}_train_losses.npy", np.array(train_losses))
np.save(f"{model_name}_test_losses.npy", np.array(test_losses))

### 4.2. LSTM

In [22]:
lstm = Seq2SeqModel(
    LSTMEncoder(input_dim, hidden_dim, num_layers),
    LSTMDecoder(hidden_dim, output_dim, num_layers)
).to(device)

In [23]:
train_losses, test_losses = train(lstm, train_loader, test_loader, seq_len, num_epochs, learning_rate, device)

In [24]:
ax = subplots(figsize=(10, 4))[1]
ax.plot(train_losses, label='Train Loss')
ax.plot(test_losses, label='Test Loss')
ax.set_xlabel('Epoch')
ax.set_ylabel('MSE Loss')
ax.set_title('LSTM')
plt.legend();

In [25]:
criterion = nn.MSELoss()
test_loss = evaluate(lstm, test_loader, criterion, seq_len)
print(f'Test RMSE: {test_loss ** 0.5:.4f}')

In [26]:
model_name = "LSTM"
torch.save(lstm.state_dict(), f"{model_name}.pt")
np.save(f"{model_name}_train_losses.npy", np.array(train_losses))
np.save(f"{model_name}_test_losses.npy", np.array(test_losses))

### 4.3. GRU

In [27]:
gru = Seq2SeqModel(
    GRUEncoder(input_dim, hidden_dim, num_layers),
    GRUDecoder(hidden_dim, output_dim, num_layers)
).to(device)

In [28]:
train_losses, test_losses = train(gru, train_loader, test_loader, seq_len, num_epochs, learning_rate, device)

In [29]:
ax = subplots(figsize=(10, 4))[1]
ax.plot(train_losses, label='Train Loss')
ax.plot(test_losses, label='Test Loss')
ax.set_xlabel('Epoch')
ax.set_ylabel('MSE Loss')
ax.set_title('GRU')
plt.legend();

In [30]:
criterion = nn.MSELoss()
test_loss = evaluate(gru, test_loader, criterion, seq_len)
print(f'Test RMSE: {test_loss ** 0.5:.4f}')

In [31]:
model_name = "GRU"
torch.save(gru.state_dict(), f"{model_name}.pt")
np.save(f"{model_name}_train_losses.npy", np.array(train_losses))
np.save(f"{model_name}_test_losses.npy", np.array(test_losses))