## Week 3
### Importing dependencies

In [1]:
import torch
import torchvision
from torchvision import transforms

from torch import nn
import torch.nn.functional as F
from torch.optim import AdamW

from torch.utils.tensorboard import SummaryWriter

### Preparing the data

In [2]:
def train_pl():
    # A set of transformations to apply to the images
    transform = transforms.Compose([
        transforms.Grayscale(),
        transforms.ToTensor(),
        transforms.Normalize(0.485, 0.229)
        #transforms.Normalize(0.508, 0.323)
    ])

    # Load the training dataset (the data is now "readied"; it is not actually loaded into memory)
    train_data = torchvision.datasets.FER2013(root='./', split="train", transform=transform)

    # Create a data loader for (actually) loading the data in batches
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=3) # Battery saving mode may limit the number of workers (possibly causing error)

    return train_loader

## Week 4
### Define the model architecture

In [3]:
class EmotionModel(nn.Module):
    def __init__(self):
        super(EmotionModel, self).__init__()
        
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),     # padding=1 keeps the image size the same before and after convolution
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),          # [batch_size, 32, 48, 48] => [batch_size, 32, 24, 24]
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),          # [batch_size, 64, 24, 24] => [batch_size, 64, 12, 12]
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)           # [batch_size, 128, 12, 12] => [batch_size, 128, 6, 6]
        )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 6 * 6, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 7)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)   # [batch_size, flattened_size]
        x = self.fc_layers(x)
        return x
            

In [4]:
# model = EmotionModel()
# model.cuda()
# emotion_labels = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]

# batch = next(iter(train_loader))
# x, y = batch
# prediction = model(x.cuda()).cpu()
# print([emotion_labels[index] for index in prediction.argmax(dim=1)])

## Week 5
### Training function

In [5]:
def train():
    model = EmotionModel().cuda()
    optimizer = AdamW(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()
    
    train_set = train_pl()
    writer = SummaryWriter()
    
    for epoch in range(50):
        num_samples = 0
        train_correct_count = 0
        train_loss_sum = 0.0
        for batch in train_set:
            X, y = batch
            num_samples += X.size(0)
            X, y = X.cuda(), y.cuda()
            prediction = model(X)
            loss = loss_fn(prediction, y)
            
            train_correct_count += (prediction.argmax(dim=1) == y).sum().item()
            train_loss_sum += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        writer.add_scalar('train_loss', train_loss_sum / num_samples, epoch)
        writer.add_scalar('train_accuracy', train_correct_count / num_samples, epoch)
    
    writer.flush()
    writer.close()
    
    return model

### Train the model and save its parameters

In [6]:
### Uncomment the lines below and then execute to perform the training & saving
# trained_model = train()
# torch.save(trained_model, "model_MK1")

## Week 6
### Load the saved parameters
Note: The model architecture is not saved. When loading the saved parameters, the model class must be already defined (or imported).

In [7]:
model = torch.load("model_MK1")
model

EmotionModel(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (0): Linear(in_features=4608, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=128, out_features=7, bias=True)
  )
)

From here on out, there are several things that can be done:

    1. Test the model on the test set

    2. Perform further training to potentially improve the model

    3. Get more (better) data for training

    4. Modularize the code into organized scripts
    
    5. Deploy the model for use