In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

from joblib import load
from tqdm import tqdm

from utils import create_data_loaders, train_model, test_model

#### Hyperparameters

In [2]:
RANDOM_STATE = 42
BATCH_SIZE = 32
NUM_EPOCHS = 40
LEARNING_RATE = 1e-3
PATIENCE = 10
MIN_DELTA = 1e-4

### Baseline-2: LSTM (pooled) + ff

In [4]:
class DualLSTMModel2(nn.Module):
    def __init__(self,
                 run_size = 20,
                 incoming_run_size = 45,
                 run_hidden_size = 128,
                 incoming_run_hidden_size = 128,
                 num_layers = 1,
                 dropout = 0.2,
                 ff_hidden_sizes=None,
                 ff_output_size=49):
        super().__init__()
        self.run_size = run_size
        self.incoming_run_size = incoming_run_size
        self.run_hidden_size = run_hidden_size
        self.incoming_run_hidden_size = incoming_run_hidden_size

        # self.flag = True

        if ff_hidden_sizes is None:
            ff_hidden_sizes = [128, 64]
        self.lstm_run = nn.LSTM(
            input_size=run_size,
            hidden_size=run_hidden_size,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True,
        )

        self.lstm_incoming_run = nn.LSTM(
            input_size=incoming_run_size,
            hidden_size=incoming_run_hidden_size,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True
        )

        last_output_size = run_hidden_size + incoming_run_hidden_size
        ff_layers = []
        prev_hidden_size = last_output_size

        for hidden_size in ff_hidden_sizes:
            ff_layers.extend([
                nn.Linear(prev_hidden_size, hidden_size),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
            prev_hidden_size = hidden_size

        ff_layers.append(nn.Linear(prev_hidden_size, ff_output_size))
        self.fead_forward = nn.Sequential(*ff_layers)

    def forward(self, x1, x2, lengths1, lengths2):
        if lengths1 is not None:
            x1_packed = nn.utils.rnn.pack_padded_sequence(
                x1, lengths1.cpu(), batch_first=True, enforce_sorted=False
            )
            lstm1_out_packed, (h1_n, c1_n) = self.lstm_run(x1_packed)
            lstm1_out, _ = nn.utils.rnn.pad_packed_sequence(lstm1_out_packed, batch_first=True)
            # if self.flag:
            #     print(lstm1_out.shape)
            #     print(lengths1.shape)
            #     print(lstm1_out.sum(dim=1).shape)

            lengths1 = lengths1.unsqueeze(1)
            out_run = lstm1_out.sum(dim=1) / lengths1
            # if self.flag:
            #     print(out_run.shape)
            #     self.flag = False
        else:
            lstm1_out, (h1_n, c1_n) = self.lstm_run(x1)
            out_run = lstm1_out.mean(dim=1)

        if lengths2 is not None:
            x2_packed = nn.utils.rnn.pack_padded_sequence(
                x2, lengths2.cpu(), batch_first=True, enforce_sorted=False
            )
            lstm2_out_packed, (h2_n, c2_n) = self.lstm_incoming_run(x2_packed)
            lstm2_out, _ = nn.utils.rnn.pad_packed_sequence(lstm2_out_packed, batch_first=True)
            lengths2 = lengths2.unsqueeze(1)
            out_incoming_run = lstm2_out.sum(dim=1) / lengths2
        else:
            lstm2_out, (h2_n, c2_n) = self.lstm_incoming_run(x2)
            out_incoming_run = lstm2_out.mean(dim=1)

        return self.fead_forward(torch.concat([out_run, out_incoming_run], dim=1))

In [5]:
# Model summary
from torchinfo import summary

model = DualLSTMModel2()

summary(
    model,
    input_data=(
        torch.randn(32, 755, 20),  # x1: batch_size=8, seq_len=10, feature_dim=20
        torch.randn(32, 755, 45),  # x2
        torch.full((32,), 700),    # lengths1
        torch.full((32,), 700)     # lengths2
    )
)

torch.Size([32, 700, 128])
torch.Size([32])
torch.Size([32, 128])
torch.Size([32, 128])


Layer (type:depth-idx)                   Output Shape              Param #
DualLSTMModel2                           [32, 49]                  --
├─LSTM: 1-1                              [22400, 128]              76,800
├─LSTM: 1-2                              [22400, 128]              89,600
├─Sequential: 1-3                        [32, 49]                  --
│    └─Linear: 2-1                       [32, 128]                 32,896
│    └─ReLU: 2-2                         [32, 128]                 --
│    └─Dropout: 2-3                      [32, 128]                 --
│    └─Linear: 2-4                       [32, 64]                  8,256
│    └─ReLU: 2-5                         [32, 64]                  --
│    └─Dropout: 2-6                      [32, 64]                  --
│    └─Linear: 2-7                       [32, 49]                  3,185
Total params: 210,737
Trainable params: 210,737
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 477.10
Input size (MB): 6.28
F

In [6]:
run_matrices = load('run_matrices.joblib')
incoming_run_matrices = load('incoming_run_matrices.joblib')
metrology_matrix = load('metrology_matrix.joblib')

X_run = torch.from_numpy(run_matrices).float()
X_incoming_run = torch.from_numpy(incoming_run_matrices).float()
y = torch.from_numpy(metrology_matrix).float()
print(X_run.shape, X_incoming_run.shape, y.shape)

torch.Size([4140, 755, 20]) torch.Size([4140, 755, 45]) torch.Size([4140, 49])


In [8]:
baseline_2_model = DualLSTMModel2(
    run_hidden_size=128,
    incoming_run_hidden_size=128,
    num_layers=1,
    dropout=0.2,
    ff_hidden_sizes=[128, 64]
)

train_loader, val_loader, test_loader = create_data_loaders(X_run, X_incoming_run, y, train_ratio=0.7, val_ratio=0.1, batch_size=BATCH_SIZE, random_state=RANDOM_STATE)

train_losses, val_losses = train_model(baseline_2_model, train_loader, val_loader, num_epochs=1, learning_rate=LEARNING_RATE, patience=PATIENCE, min_delta=MIN_DELTA, model_save_path='baseline-2-best-model.pth')

torch.Size([32, 729, 128])
torch.Size([32])
torch.Size([32, 128])
torch.Size([32, 128])
Epoch 1/1
Train Loss: 28.093435, Val Loss: 0.100016
Learning Rate: 1.00e-03
--------------------


In [9]:
test_results = test_model(baseline_2_model, test_loader)

Testing model on 26 batches...


In [10]:
print({k: test_results[k] for k in ['test_loss', 'mse', 'mae', 'r2_score']})

{'test_loss': 0.10404751808024369, 'mse': 0.10392114520072937, 'mae': 0.2574879825115204, 'r2_score': -2.444319486618042}
