## RNN

In [106]:
import numpy as np
import optuna
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from optuna import Trial, Study
from torch.utils.data import DataLoader, TensorDataset
from tqdm.auto import tqdm
from utils.training import train, best_torch_device
from torchinfo import summary
from copy import copy, deepcopy

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
device = best_torch_device()
# device = torch.device('cpu')
print(best_torch_device())

mps


In [87]:
X_train_valid = np.load("data/X_train_valid.npy")
y_train_valid = np.load("data/y_train_valid.npy")
person_train_valid = np.load("data/person_train_valid.npy")

X_train_aug = np.load("data/generated/frequency/X_train_augmented.npy")
y_train_aug = np.load("data/generated/frequency/y_train_augmented.npy")
person_train_aug = np.load("data/generated/frequency/person_train_augmented.npy")

X_test = np.load("data/X_test.npy")
y_test = np.load("data/y_test.npy")
person_test = np.load("data/person_test.npy")

y_train_valid -= 769
y_train_aug -= 769
y_test -= 769

X_train = X_train_valid[:1777]
y_train = y_train_valid[:1777]
X_valid = X_train_valid[1777:]
y_valid = y_train_valid[1777:]


In [5]:
X_train_aug.shape

(8885, 22, 1000)

In [6]:
class View(nn.Module):
    def __init__(self, *shape):
        super(View, self).__init__()
        self.shape = shape

    def forward(self, x):
        return x.view(*self.shape)

class Permute(nn.Module):
    def __init__(self, *dims):
        super(Permute, self).__init__()
        self.dims = dims

    def forward(self, x):
        return x.permute(*self.dims)
    
def train(model, optimizer, loader, val_loader, cel_loss, num_epochs):
    train_acc_hist = []
    val_acc_hist = []
    for epoch_idx in tqdm(range(num_epochs)):
        # Set model to train mode - useful for layers such as BatchNorm or Dropout whose behaviors change between train/eval
        model.train()
        train_count = 0
        train_correct_count = 0
        for batch_idx, (train_x, train_y) in enumerate(loader):
            train_x = train_x.float().to(device)
            train_y = train_y.long().to(device)
            optimizer.zero_grad()
            logits = model(train_x)
            loss = cel_loss(logits, train_y)
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                y_hat = torch.argmax(logits, dim=-1)
                train_correct_count += torch.sum(y_hat == train_y, axis=-1)
                train_count += train_x.size(0)

        train_acc = train_correct_count / train_count
        train_acc_hist.append(train_acc)

        model.eval()
        val_count = 0
        val_correct_count = 0
        with torch.no_grad():
            for idx, (val_x, val_y) in enumerate(val_loader):
                val_x = val_x.float().to(device)
                val_y = val_y.long().to(device)
                logits = model(val_x).detach()
                y_hat = torch.argmax(logits, dim=-1)
                val_correct_count += torch.sum(y_hat == val_y, axis=-1)
                val_count += val_x.size(0)
        val_acc = val_correct_count / val_count
        val_acc_hist.append(val_acc)
        print('Train acc: {:.3f}, Val acc: {:.3f}'.format(train_acc, val_acc))

In [7]:
class RNN(nn.Module):

    def __init__(self):
        super(RNN, self).__init__()

        self.tdd = nn.Sequential(
            nn.Linear(22, 40),
            nn.ReLU(),
            nn.Dropout(0.5),
        )

        self.bn = nn.BatchNorm1d(40)

        self.num_layers = 4 # 3, 4
        self.hidden_size = 20 # 20
        self.lstm = nn.LSTM(
            input_size=40,
            hidden_size=self.hidden_size,
            dropout=0.5,
            num_layers=self.num_layers,
            batch_first=True,
        )

        self.linear2 = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(20),
            nn.ReLU(),
            nn.BatchNorm1d(20),
            nn.Dropout(0.5),
            nn.LazyLinear(out_features=4),
        )

        self.softmax = nn.Softmax(dim=1)  # the dim corresponds to num_output_classes=4

    def forward(self, x):
        """
        x should have shape (N, H, L) = (N, 22, 1000), where
            L = sequence length
            N = batch size
            H = input size
        """
        x = x.permute(0, 2, 1) # (N, 1000, 22)
        x = self.tdd(x) # (N, 1000, 40)

        x = x.permute(0, 2, 1)
        x = self.bn(x)
        x = x.permute(0, 2, 1)

        # device = torch.device('mps')
        # h0 = torch.randn(self.num_layers, self.hidden_size).to(device)
        # c0 = torch.randn(self.num_layers, self.hidden_size).to(device)
        x, _ = self.lstm(x)

        x = self.linear2(x[:, 9::10, :])

        x = self.softmax(x)
        return x

In [80]:
class RNN(nn.Module):

    def __init__(self):
        super(RNN, self).__init__()

        self.tdd = nn.Sequential(
            nn.Linear(22, 40),
            nn.ReLU(),
            nn.Dropout(0.5),
        )

        self.bn = nn.BatchNorm1d(40)

        self.num_layers = 3 # 3, 4
        self.hidden_size = 20 # 20
        self.lstm = nn.LSTM(
            input_size=40,
            hidden_size=self.hidden_size,
            dropout=0.5,
            num_layers=self.num_layers,
            batch_first=True,
        )

        self.linear2 = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(20),
            nn.ReLU(),
            nn.BatchNorm1d(20),
            nn.Dropout(0.5),
            nn.LazyLinear(out_features=4),
        )

        self.softmax = nn.Softmax(dim=1)  # the dim corresponds to num_output_classes=4

    def forward(self, x):
        """
        x should have shape (N, H, L) = (N, 22, 1000), where
            L = sequence length
            N = batch size
            H = input size
        """
        x = x.permute(0, 2, 1) # (N, 1000, 22)
        x = self.tdd(x) # (N, 1000, 40)

        x = x.permute(0, 2, 1)
        x = self.bn(x)
        x = x.permute(0, 2, 1)

        # device = torch.device('mps')
        # h0 = torch.randn(self.num_layers, self.hidden_size).to(device)
        # c0 = torch.randn(self.num_layers, self.hidden_size).to(device)
        x, _ = self.lstm(x)

        x = self.linear2(x[:, 9::10, :])

        x = self.softmax(x)
        return x

In [81]:
bsz = 128
sample_device = torch.device('mps')
sample_model = RNN().to(sample_device)
test_input = torch.randn(128, 22, 1000).to(sample_device)
print(sample_model(test_input).shape)
summary(sample_model, (bsz, 22, 1000), device=device)



torch.Size([128, 4])


Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [128, 4]                  --
├─Sequential: 1-1                        [128, 1000, 40]           --
│    └─Linear: 2-1                       [128, 1000, 40]           920
│    └─ReLU: 2-2                         [128, 1000, 40]           --
│    └─Dropout: 2-3                      [128, 1000, 40]           --
├─BatchNorm1d: 1-2                       [128, 40, 1000]           80
├─LSTM: 1-3                              [128, 1000, 20]           11,680
├─Sequential: 1-4                        [128, 4]                  --
│    └─Flatten: 2-4                      [128, 2000]               --
│    └─Linear: 2-5                       [128, 20]                 40,020
│    └─ReLU: 2-6                         [128, 20]                 --
│    └─BatchNorm1d: 2-7                  [128, 20]                 40
│    └─Dropout: 2-8                      [128, 20]                 --
│    └

## Training and Predicting on All Subjects

In [103]:
bsz = 128

train_loader = DataLoader(TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train)), shuffle=True, batch_size=bsz)
train_aug_loader = DataLoader(TensorDataset(torch.from_numpy(X_train_aug), torch.from_numpy(y_train_aug)), shuffle=True, batch_size=bsz)
val_loader = DataLoader(TensorDataset(torch.from_numpy(X_valid), torch.from_numpy(y_valid)), shuffle=False)
test_loader = DataLoader(TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test)), shuffle=False)

print(len(train_loader), len(train_aug_loader), len(val_loader), len(test_loader))

print(X_train_aug.shape)

14 70 338 443
(8885, 22, 1000)


In [104]:
model = RNN().to(device)
print(model)
num_chans, sequence_length = 22, 1000  # Image dimensions
test_input = torch.randn(bsz, num_chans, sequence_length).to(device)
print(model(test_input).shape)
# pred = model(test_input)[0]
# print(nn.CrossEntropyLoss()(pred, torch.from_numpy(y_train[0:1])))

RNN(
  (tdd): Sequential(
    (0): Linear(in_features=22, out_features=40, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (bn): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(40, 20, num_layers=3, batch_first=True, dropout=0.5)
  (linear2): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): LazyLinear(in_features=0, out_features=20, bias=True)
    (2): ReLU()
    (3): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Dropout(p=0.5, inplace=False)
    (5): LazyLinear(in_features=0, out_features=4, bias=True)
  )
  (softmax): Softmax(dim=1)
)
torch.Size([128, 4])


In [107]:
cel_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.000005)
num_epochs = 100
loader = train_aug_loader

best_model = model
best_val = 0
train_acc_hist = []
val_acc_hist = []
for epoch_idx in tqdm(range(num_epochs)):
    # Set model to train mode - useful for layers such as BatchNorm or Dropout whose behaviors change between train/eval
    model.train()
    train_count = 0
    train_correct_count = 0
    for batch_idx, (train_x, train_y) in enumerate(loader):
        train_x = train_x.float().to(device)
        train_y = train_y.long().to(device)
        optimizer.zero_grad()
        logits = model(train_x)
        loss = cel_loss(logits, train_y)
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            y_hat = torch.argmax(logits, dim=-1)
            train_correct_count += torch.sum(y_hat == train_y, axis=-1)
            train_count += train_x.size(0)

    train_acc = train_correct_count / train_count
    train_acc_hist.append(train_acc)

    model.eval()
    val_count = 0
    val_correct_count = 0
    with torch.no_grad():
        for idx, (val_x, val_y) in enumerate(val_loader):
            val_x = val_x.float().to(device)
            val_y = val_y.long().to(device)
            logits = model(val_x).detach()
            y_hat = torch.argmax(logits, dim=-1)
            val_correct_count += torch.sum(y_hat == val_y, axis=-1)
            val_count += val_x.size(0)
    val_acc = val_correct_count / val_count
    val_acc_hist.append(val_acc)
    print('Train acc: {:.3f}, Val acc: {:.3f}'.format(train_acc, val_acc))

    if val_acc > best_val:
        best_val = val_acc
        best_model = deepcopy(model)

  0%|          | 0/100 [00:00<?, ?it/s]

Train acc: 0.377, Val acc: 0.346
Train acc: 0.413, Val acc: 0.382
Train acc: 0.444, Val acc: 0.402
Train acc: 0.468, Val acc: 0.426
Train acc: 0.480, Val acc: 0.432
Train acc: 0.497, Val acc: 0.444
Train acc: 0.499, Val acc: 0.456
Train acc: 0.513, Val acc: 0.453
Train acc: 0.513, Val acc: 0.426
Train acc: 0.521, Val acc: 0.470
Train acc: 0.524, Val acc: 0.453
Train acc: 0.528, Val acc: 0.503
Train acc: 0.541, Val acc: 0.506
Train acc: 0.541, Val acc: 0.476
Train acc: 0.548, Val acc: 0.497
Train acc: 0.543, Val acc: 0.447
Train acc: 0.551, Val acc: 0.470
Train acc: 0.541, Val acc: 0.438
Train acc: 0.550, Val acc: 0.479
Train acc: 0.566, Val acc: 0.470
Train acc: 0.566, Val acc: 0.494
Train acc: 0.570, Val acc: 0.435
Train acc: 0.559, Val acc: 0.447
Train acc: 0.566, Val acc: 0.417
Train acc: 0.573, Val acc: 0.429


KeyboardInterrupt: 

In [108]:
best_model.eval()
val_count = 0
val_correct_count = 0
with torch.no_grad():
    for idx, (val_x, val_y) in enumerate(test_loader):
        val_x = val_x.float().to(device)
        val_y = val_y.long().to(device)
        logits = best_model(val_x).detach()
        y_hat = torch.argmax(logits, dim=-1)
        val_correct_count += torch.sum(y_hat == val_y, axis=-1)
        val_count += val_x.size(0)
val_acc = val_correct_count / val_count

print("Test Accuracy:", val_acc.item())

Test Accuracy: 0.4920993447303772


In [109]:
print(best_val)

tensor(0.5059, device='mps:0')


## Training and Predicting on Subject 0

In [110]:
bsz = 128

person_train = person_train_valid[:1777]
person_valid = person_train_valid[1777:]

train_index = np.where(person_train==0)[0]
train_aug_index = np.where(person_train_aug==0)[0]
val_index = np.where(person_valid==0)[0]
test_index = np.where(person_test==0)[0]

X_train_sub = X_train[train_index]
y_train_sub = y_train[train_index]
X_train_aug_sub = X_train_aug[train_aug_index]
y_train_aug_sub = y_train_aug[train_aug_index]
X_val_sub = X_valid[val_index]
y_val_sub = y_valid[val_index]
X_test_sub = X_test[test_index]
y_test_sub = y_test[test_index]

train_sub_loader = DataLoader(TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train)), shuffle=True, batch_size=bsz)
train_sub_aug_loader = DataLoader(TensorDataset(torch.from_numpy(X_train_aug), torch.from_numpy(y_train_aug)), shuffle=True, batch_size=bsz)
val_sub_loader = DataLoader(TensorDataset(torch.from_numpy(X_val_sub), torch.from_numpy(y_val_sub)), shuffle=False)
test_sub_loader = DataLoader(TensorDataset(torch.from_numpy(X_test_sub), torch.from_numpy(y_test_sub)), shuffle=False)

print(len(train_sub_loader), len(train_sub_aug_loader), len(val_sub_loader), len(test_sub_loader))

print(X_train_aug.shape)

14 70 38 50
(8885, 22, 1000)


In [111]:
model = RNN().to(device)
print(model)
num_chans, sequence_length = 22, 1000  # Image dimensions
test_input = torch.randn(bsz, num_chans, sequence_length).to(device)
print(model(test_input).shape)



RNN(
  (tdd): Sequential(
    (0): Linear(in_features=22, out_features=40, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (bn): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(40, 20, num_layers=3, batch_first=True, dropout=0.5)
  (linear2): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): LazyLinear(in_features=0, out_features=20, bias=True)
    (2): ReLU()
    (3): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Dropout(p=0.5, inplace=False)
    (5): LazyLinear(in_features=0, out_features=4, bias=True)
  )
  (softmax): Softmax(dim=1)
)
torch.Size([128, 4])


In [112]:
cel_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.000005)
num_epochs = 100
loader = train_sub_aug_loader

best_sub_model = model
best_sub_val = 0
train_acc_hist = []
val_acc_hist = []
for epoch_idx in tqdm(range(num_epochs)):
    # Set model to train mode - useful for layers such as BatchNorm or Dropout whose behaviors change between train/eval
    model.train()
    train_count = 0
    train_correct_count = 0
    for batch_idx, (train_x, train_y) in enumerate(loader):
        train_x = train_x.float().to(device)
        train_y = train_y.long().to(device)
        optimizer.zero_grad()
        logits = model(train_x)
        loss = cel_loss(logits, train_y)
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            y_hat = torch.argmax(logits, dim=-1)
            train_correct_count += torch.sum(y_hat == train_y, axis=-1)
            train_count += train_x.size(0)

    train_acc = train_correct_count / train_count
    train_acc_hist.append(train_acc)

    model.eval()
    val_count = 0
    val_correct_count = 0
    with torch.no_grad():
        for idx, (val_x, val_y) in enumerate(val_sub_loader):
            val_x = val_x.float().to(device)
            val_y = val_y.long().to(device)
            logits = model(val_x).detach()
            y_hat = torch.argmax(logits, dim=-1)
            val_correct_count += torch.sum(y_hat == val_y, axis=-1)
            val_count += val_x.size(0)
    val_acc = val_correct_count / val_count
    val_acc_hist.append(val_acc)
    print('Train acc: {:.3f}, Val acc: {:.3f}'.format(train_acc, val_acc))

    if val_acc > best_sub_val:
        best_sub_val = val_acc
        best_sub_model = deepcopy(model)

  0%|          | 0/100 [00:00<?, ?it/s]

Train acc: 0.291, Val acc: 0.447
Train acc: 0.379, Val acc: 0.526
Train acc: 0.410, Val acc: 0.632
Train acc: 0.432, Val acc: 0.500
Train acc: 0.445, Val acc: 0.421
Train acc: 0.464, Val acc: 0.500
Train acc: 0.467, Val acc: 0.553
Train acc: 0.475, Val acc: 0.553
Train acc: 0.486, Val acc: 0.447
Train acc: 0.497, Val acc: 0.474
Train acc: 0.500, Val acc: 0.553
Train acc: 0.512, Val acc: 0.632
Train acc: 0.505, Val acc: 0.421
Train acc: 0.516, Val acc: 0.553
Train acc: 0.525, Val acc: 0.447
Train acc: 0.521, Val acc: 0.526
Train acc: 0.521, Val acc: 0.500
Train acc: 0.532, Val acc: 0.500
Train acc: 0.542, Val acc: 0.526
Train acc: 0.542, Val acc: 0.579
Train acc: 0.542, Val acc: 0.316
Train acc: 0.552, Val acc: 0.579
Train acc: 0.559, Val acc: 0.474
Train acc: 0.554, Val acc: 0.474
Train acc: 0.557, Val acc: 0.500
Train acc: 0.558, Val acc: 0.474
Train acc: 0.559, Val acc: 0.526
Train acc: 0.554, Val acc: 0.395
Train acc: 0.559, Val acc: 0.395
Train acc: 0.567, Val acc: 0.553
Train acc:

In [1]:
best_sub_model.eval()
val_count = 0
val_correct_count = 0
with torch.no_grad():
    for idx, (val_x, val_y) in enumerate(test_sub_loader):
        val_x = val_x.float().to(device)
        val_y = val_y.long().to(device)
        logits = best_sub_model(val_x).detach()
        y_hat = torch.argmax(logits, dim=-1)
        val_correct_count += torch.sum(y_hat == val_y, axis=-1)
        val_count += val_x.size(0)
val_acc = val_correct_count / val_count

print("Test Accuracy:", val_acc.item())

NameError: name 'best_sub_model' is not defined

In [116]:
print(best_sub_val)

tensor(0.6316, device='mps:0')
