## Load PyTorch Dataset

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader

KeyboardInterrupt: 

In [None]:
device = (
    'cuda' if torch.cuda.is_available() else
    'mps' if torch.backends.mps.is_available() else
    'cpu'
)
print(f'Using device {device}')

In [None]:
train_dataset = torch.load('data/train.data')
test_dataset = torch.load('data/test.data')
train_dataloader = DataLoader(train_dataset, batch_size=16, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, drop_last=True)

for X, y in train_dataloader:
    print(X.shape, y.shape, X[14], y[14])
    break

In [None]:
del X, y, train_dataset, test_dataset

In [None]:
len_vocab = 24426

# Neural network

## Make architecture

In [None]:
class FeatureExtractor(nn.Module):
    def __init__(self, vocab_size: int,
                 embedding_dim: int,
                 seq_len: int) -> None:
        super().__init__()
        self.embedding = nn.Embedding(vocab_size,
                                      embedding_dim)  # output shape: (batch_size, seq_len, embedding_dim)
        self.conv_blocks = nn.Sequential(
            nn.Sequential(
                nn.Conv1d(in_channels=seq_len,
                          out_channels=1024,
                          kernel_size=3),
                nn.BatchNorm1d(num_features=1024),
                nn.AvgPool1d(kernel_size=1,
                             stride=2),
                nn.ReLU()
            ),
            nn.Sequential(
                nn.Conv1d(in_channels=1024,
                          out_channels=512,
                          kernel_size=3),
                nn.BatchNorm1d(num_features=512),
                nn.AvgPool1d(kernel_size=1,
                             stride=2),
                nn.ReLU()
            ),
            nn.Sequential(
                nn.Conv1d(in_channels=512,
                          out_channels=256,
                          kernel_size=3),
                nn.BatchNorm1d(num_features=256),
                nn.AvgPool1d(kernel_size=1,
                             stride=2),
                nn.ReLU()
            )
        )

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        X = self.embedding(X)
        return self.conv_blocks(X)
    

test = FeatureExtractor(vocab_size=len_vocab,
                        embedding_dim=100,
                        seq_len=2969).to(device)
X, _ = next(iter(test_dataloader))
print(test(X).size())
del X, test


class EmotionalClassifier(nn.Module):
    def __init__(self,
                 vocab_size: int,
                 embedding_dim: int,
                 seq_len: int,
                 output_size: int,
                 hidden_layers: int):
        super().__init__()
        self.input_layer = FeatureExtractor(vocab_size,
                                            embedding_dim,
                                            seq_len)  # output shape: (batch_size, 256, 11)
        self.hidden_layers = nn.LSTM(input_size=11,
                                     hidden_size=128,
                                     num_layers=hidden_layers,
                                     batch_first=True)  # output shape: (batch_size, 2 * hidden_size)
        self.flatten = nn.Flatten()
        hidden_size = 256 * 128
        self.linear_layers = nn.Sequential(
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout(0.5),
            nn.Linear(hidden_size, 64),
            nn.Tanh(),
            nn.Linear(64, output_size),
            nn.Sigmoid()
        )
        self.states = self.init_states(hidden_layers, 128)
        
    def init_states(self, num_layers: int, hidden_size: int) -> tuple[torch.Tensor,
                                                                      torch.Tensor]:
        return (torch.zeros((num_layers, 16, hidden_size), device=device),
                torch.zeros((num_layers, 16, hidden_size), device=device))
    
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        X = self.input_layer(X)
        X, self.states = self.hidden_layers(X, self.states)
        self.states = self.states[0].detach(), self.states[1].detach()
        X = self.flatten(X)
        return self.linear_layers(X)
    

model = EmotionalClassifier(vocab_size=len_vocab,
                            embedding_dim=100,
                            seq_len=2969,
                            output_size=5,
                            hidden_layers=1).to(device)
model

In [None]:
epochs = 1
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)
critetion = nn.CrossEntropyLoss()

In [None]:
from IPython.display import clear_output
import numpy as np
from sklearn.metrics import classification_report


def batch_stat(x: torch.Tensor) -> str:
    predicted = x.argmax(dim=1).tolist()
    dict_ = {key: predicted.count(key) for key in set(predicted)}
    return str(dict_)


mean_train_losses = []
mean_test_losses = []
for epoch in range(epochs):
    train_losses = []
    test_losses = []
    model.train(True)
    for batch, (inputs, targets) in enumerate(train_dataloader):
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = critetion(outputs, targets.argmax(dim=1))
        
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())
        
        if (batch + 1) % 5 == 0:
            print(f'Epoch: {epoch + 1}, Train batch {batch + 1}, loss: {train_losses[-1]}\noutputs {batch_stat(outputs)}')
        if (batch + 1) % 50 == 0:
            clear_output()

    train_losses = np.array(train_losses)
    mean_train_losses.append(train_losses.mean())
    predicted = np.zeros(0)
    true = np.zeros(0)
    
    clear_output()
    model.eval()
    with torch.no_grad():
        for batch, (inputs, targets) in enumerate(test_dataloader):
            outputs = model(inputs)
            loss = critetion(outputs, targets)
            
            predicted = np.concatenate((predicted, outputs.argmax(dim=1).tolist()))
            true = np.concatenate((true, targets.argmax(dim=1).tolist()))

            test_losses.append(loss.item())
            
            print(f'Epoch: {epoch + 1}, Test batch {batch + 1}, loss: {test_losses[-1]}\noutputs {batch_stat(outputs)}')
        clear_output()
        
    test_losses = np.array(test_losses)
    mean_test_losses.append(test_losses.mean())
print(classification_report(true, predicted))

In [None]:
import matplotlib.pyplot as plt


fig = plt.figure(figsize=(12,6))
axes = fig.subplots(1, 2)
axes[0].plot(np.arange(1, epochs + 1), np.array(mean_train_losses),
         color="red", label="Train")
axes[1].plot(np.arange(1, epochs + 1), np.array(mean_test_losses),
         color="blue", label="Test")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("CrossEntropy")

axes[1].set_xlabel("Epoch")
axes[1].set_ylabel("CrossEntropy")

fig.suptitle("Losses")
axes[0].grid()
axes[1].grid()
fig.legend()
fig.show()