# Training the model

## Imports

In [1]:
import sys
import os

sys.path.append(os.path.abspath('../scripts'))

from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import classification_report
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from scripts.FER_CNN import FERCNN

## Vars

In [2]:
training_dataset_path = '../.data/output/balanced_train_dataset.pth'
test_dataset_path = '../.data/output/test_dataset.pth'
log_dir = '../.data/output/logs'
model_output_path = '../scripts/model/model.tar'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f'Using {device} for this notebook')

Using cuda for this notebook


## Datasets

In [3]:
training_dataset = torch.load(training_dataset_path)
test_dataset = torch.load(test_dataset_path)

  training_dataset = torch.load(training_dataset_path)
  test_dataset = torch.load(test_dataset_path)


## Preparation
Before we can train the model we need to perform some steps

### Model
We will create an instance of the FER CNN model

In [4]:
model = FERCNN().to(device)

### Class weights
For the loss functions we will need the weights to make the classes more balanced. We will use following formula to calculate the weights: 
$$weight_i = \frac{total samples}{num\ classes \cdot class\ count_i}$$

In [5]:
labels = [label for _, label in training_dataset]
class_counts = Counter(labels)
total_samples = len(training_dataset)
num_classes = len(class_counts)

class_weights = torch.tensor(
    [total_samples / (num_classes * class_counts[i]) for i in range(num_classes)],
    dtype=torch.float32,
    device=device
)

In [6]:
class_weights

tensor([1., 1., 1., 1., 1., 1., 1.], device='cuda:0')

Above we can see the weights we created. We can use these in the loss functions to make the classes more balanced. Because we balanced the dataset before we can see that the weights are all equal to 1. We will still use these weights in our code for if something would change in the future.

### Loss functions
The loss function will be used in the training loop to determine how far off the model is from the actual values. We will be using CrossEntropyLoss which uses following formula: 
$$L = - \sum_i{y_ilog(\hat{y}_i)}$$
Because we are using our own weights this formula becomes:
$$L = - \sum_i{w_i ⋅ y_ilog(\hat{y}_i)}$$


In [7]:
criterion = nn.CrossEntropyLoss(weight=class_weights)

### Optimizer

In [8]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Dataloader
Below we will create the dataloaders for the model to use

In [9]:
train_loader = DataLoader(
    dataset=training_dataset,
    batch_size=512,
    shuffle=True,
    pin_memory=True,
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=512,
    shuffle=False,
    pin_memory=True,
)

## Training
Now that all the preparation steps are completed we can start training the model.

This function trains a deep learning model for a specified number of epochs. First, it ensures that a new directory is created for logging the training run, naming it based on the next available run number. During each epoch, the model is set to training mode, and the loss is computed using the training data. The optimizer updates the model's parameters based on the gradients calculated from the loss. After training, the model is evaluated on the validation set, and both the training and validation losses are logged to TensorBoard for tracking performance across epochs. This process helps monitor overfitting and model convergence.

In [10]:
def train_model(epochs):
    global log_dir
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    existing_runs = [int(d[3:]) for d in os.listdir(log_dir) if d.startswith("run") and d[3:].isdigit()]
    next_run = max(existing_runs) + 1 if existing_runs else 0
    log_dir_run = os.path.join(log_dir, f"run{next_run}")

    writer = SummaryWriter(log_dir_run)
    for epoch in range(epochs):
        model.train()
        running_loss, val_loss = 0.0, 0.0

        for images, labels in train_loader:
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        running_loss /= len(train_loader)

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
                preds = model(images)
                loss = criterion(preds, labels)
                val_loss += loss.item()

        val_loss /= len(test_loader)

        print(
            f"Epoch [{epoch + 1}/{epochs}],\n\tTraining loss: {running_loss:.4f}\n\tValidation loss: {val_loss:.4f}")

        writer.add_scalar('Training Loss', running_loss, global_step=epoch + 1)
        writer.add_scalar('Validation Loss', val_loss, global_step=epoch + 1)

    writer.close()


In [11]:
train_model(200)

Epoch [1/200],
	Training loss: 1.9087
	Validation loss: 1.8838
Epoch [2/200],
	Training loss: 1.8343
	Validation loss: 1.8380
Epoch [3/200],
	Training loss: 1.7398
	Validation loss: 1.7061
Epoch [4/200],
	Training loss: 1.6569
	Validation loss: 1.6665
Epoch [5/200],
	Training loss: 1.6028
	Validation loss: 1.5764
Epoch [6/200],
	Training loss: 1.5465
	Validation loss: 1.6214
Epoch [7/200],
	Training loss: 1.5132
	Validation loss: 1.5688
Epoch [8/200],
	Training loss: 1.4866
	Validation loss: 1.5019
Epoch [9/200],
	Training loss: 1.4553
	Validation loss: 1.5051
Epoch [10/200],
	Training loss: 1.4359
	Validation loss: 1.5464
Epoch [11/200],
	Training loss: 1.4157
	Validation loss: 1.4831
Epoch [12/200],
	Training loss: 1.4003
	Validation loss: 1.4618
Epoch [13/200],
	Training loss: 1.3843
	Validation loss: 1.4610
Epoch [14/200],
	Training loss: 1.3678
	Validation loss: 1.4313
Epoch [15/200],
	Training loss: 1.3559
	Validation loss: 1.4147
Epoch [16/200],
	Training loss: 1.3495
	Validatio

## Evaluating the model

In [12]:
def evaluate():
    model.eval()
    correct, total = 0, 0
    all_preds = []
    all_labels = []
    emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    print(f"Test Accuracy: {(correct / total) * 100:.2f}%")
    print(classification_report(all_labels, all_preds, target_names=emotions))

In [15]:
evaluate()

Test Accuracy: 56.44%
              precision    recall  f1-score   support

       Angry       0.47      0.47      0.47       958
     Disgust       0.28      0.77      0.41       111
        Fear       0.42      0.25      0.31      1024
       Happy       0.81      0.77      0.79      1774
         Sad       0.42      0.54      0.47      1247
    Surprise       0.70      0.73      0.71       831
     Neutral       0.54      0.51      0.52      1233

    accuracy                           0.56      7178
   macro avg       0.52      0.57      0.53      7178
weighted avg       0.57      0.56      0.56      7178



## Save the model

In [14]:
torch.save(model.state_dict(), model_output_path)