In [1]:
import utils.utils as utils
import utils.datasets as datasets

import os
import random
import itertools
from typing import Callable, List, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import torchvision

pd.set_option("display.max_columns", 50)
%load_ext autoreload
%autoreload 2

In [2]:
df, input_data = datasets.casme2(resize=64, color=False)

In [3]:
pr_frames = [video for video in tqdm(input_data)]

100%|█████████████████████████████████████████| 256/256 [01:49<00:00,  2.34it/s]


In [4]:
device = torch.device("cuda:1")

In [5]:
class MEGC(Dataset):
    def __init__(self, frames, labels, transform=None):
        self.frames = frames
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.frames)
    
    def __getitem__(self, idx):
        sample = self.frames[idx]
        if self.transform:
            sample = self.transform(sample)
        label = self.labels[idx]
        return sample, label

In [6]:
def calculate_W(T, alpha=20, r1=0.4, r2=0.05):
    W = torch.zeros(T, T, dtype=torch.float).to(device)
    #construct W
    for i in range(T):
        for j in range(T):
            a = j - i
            b = min(1, i)
            if j > i:
                W[i, j] = alpha * (1 - r1) ** a * r1 ** b - alpha * (1 - r2) ** a * r2 ** b
            elif j == i:
                W[i, j] = alpha * (r1 - r2)
    return W

In [7]:
def led(x, W):
    s, f, c, h, w = x.shape
    out = torch.einsum("sfchw,fx->sxchw", x, W)
    div = torch.einsum("sfchw,fx->sxchw", x, torch.abs(W))
    out /= div + 1
    out[:, 0] = x[:, 0]
    return out

In [8]:
class Net(nn.Module):
    def __init__(self, task_num, dropout=0.5):
        super().__init__()
        self.task_num = task_num
        h1 = 32
        h2 = 64
        h3 = 256
        self.conv1 = nn.Conv3d(in_channels=1, out_channels=h1, kernel_size=(1, 5, 5), stride=1)
        self.pool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 3, 3))
        self.bn1 = nn.BatchNorm3d(h1)
        self.drop1 = nn.Dropout3d(dropout)
        
        self.conv2 = nn.Conv3d(in_channels=h1, out_channels=h2, kernel_size=(2, 3, 3), stride=1)
        self.bn2 = nn.BatchNorm3d(h2)
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
        self.drop2 = nn.Dropout3d(dropout)

        self.fc1 = nn.Linear(9 ** 2 * 2 * h2, h3)
        self.fcs = nn.ModuleList([nn.Linear(h3, 2) for _ in range(self.task_num)])
        self.drop3 = nn.Dropout(dropout)

        self.alpha = nn.Parameter(torch.log(torch.tensor(10.0)))
        self.r1 = nn.Parameter(torch.log(torch.tensor(0.4)))
        self.r2 = nn.Parameter(torch.log(torch.tensor(0.05)))
        
    def forward(self, x):
        W = calculate_W(6, torch.exp(self.alpha), torch.exp(self.r1), torch.exp(self.r2)).to(device)
        x = led(x, W)
        x = x[:, 1:]
        x = x.permute(0, 2, 1, 3, 4)
     
        x = self.drop1(self.bn1(self.pool(F.relu(self.conv1(x)))))
        x = self.drop2(self.bn2(self.pool2(F.relu(self.conv2(x)))))

        x = x.view(x.shape[0], -1)

        x = F.relu(self.fc1(x))
        x = self.drop3(x)
        xs = [fc(x) for fc in self.fcs]
        return xs

In [9]:
class MultiTaskF1(nn.Module):
    def __init__(self, task_num):
        super(MultiTaskF1, self).__init__()
        self.task_num = task_num
        
    def calc_f1(self, label, prediction):
        _, predicted = torch.max(prediction, 1)
        f1 = f1_score(label.cpu(), predicted.detach().cpu(), average="macro")
        return f1
                        
    def forward(self, preds, labels):
        f1s = [self.calc_f1(labels[:, i], preds[i]) for i in range(self.task_num)]
        return f1s

In [10]:
def train_transform(video):
    n_frames = 6
    max_f = np.random.randint(n_frames, video.shape[0] - 1)
    idx = np.round(np.linspace(0, max_f, n_frames)).astype("int")
    video = video[idx]
    video = np.expand_dims(video, 1)
    return video

def test_transform(video):
    n_frames = 6
    idx = np.round(np.linspace(0, video.shape[0] - 1, n_frames)).astype("int")
    video = video[idx]
    video = np.expand_dims(video, 1)
    return video

In [11]:
def LOSO(features, df, action_units, epochs=200, lr=0.01, batch_size=128, dropout=0.05, weight_decay=0.001):
    random.seed(1)
    torch.manual_seed(1)
    np.random.seed(1)
    torch.cuda.manual_seed(1)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    labels = np.concatenate([np.expand_dims(df[au], 1) for au in action_units], axis=1)
    outputs_list = []
    labels_list = []
    for group in df.groupby("subject"):
        subject = group[0]
        #Split data
        train_index = np.array(df[df["subject"] != subject].index)
        X_train = [features[i] for i in train_index]
        y_train = labels[train_index]
        
        test_index = np.array(df[df["subject"] == subject].index)
        X_test = [features[i] for i in test_index]
        y_test = labels[test_index]
        
        megc_dataset_train = MEGC(X_train, y_train, train_transform)
        dataset_loader_train = torch.utils.data.DataLoader(megc_dataset_train,
                                                             batch_size=batch_size, shuffle=True,
                                                             num_workers=0)

        megc_dataset_test = MEGC(X_test, y_test, test_transform)
        dataset_loader_test = torch.utils.data.DataLoader(megc_dataset_test,
                                                         batch_size=100, shuffle=False,
                                                         num_workers=0)
        
        net = Net(labels.shape[1], dropout=dropout).float().to(device)
        criterion = utils.MultiTaskLoss(labels.shape[1])
        evaluation = MultiTaskF1(labels.shape[1])
        optimizer = optim.Adam([{"params": list(net.parameters())[:3], "lr": 0.1},
                                {"params": list(net.parameters())[3:]}
                               ], lr=lr, weight_decay=weight_decay)
        
        for epoch in range(epochs):  # loop over the dataset multiple times
            vals.append([net.alpha.item(), net.r1.item(), net.r2.item()])
            if epoch == 50:# or epoch == 100:# or epoch == 200:
                optimizer.param_groups[0]["lr"] *= 0.1
                pass
            for batch in dataset_loader_train:
                data_batch, labels_batch = batch[0].to(device), batch[1].to(device)
                #data_batch = transform(data_batch)
                optimizer.zero_grad()

                outputs = net(data_batch.float())
                loss = criterion(outputs, labels_batch.long())
                loss.backward()
                optimizer.step()

        #eval
        net.eval()
        data_batch_test, labels_batch_test = dataset_loader_test.__iter__().__next__()
        data_batch_test = data_batch_test.to(device)
        outputs = net(data_batch_test.float())
        outputs_list.append([output.cpu().detach() for output in outputs])
        labels_list.append(labels_batch_test)
        train_outputs = net(data_batch.float())
        net.train()
        f1_train = evaluation(train_outputs, labels_batch.long())
        f1 = evaluation(outputs, labels_batch_test.long())
        print(f"Alpha: {torch.exp(net.alpha)}, r1: {torch.exp(net.r1)}, r2: {torch.exp(net.r2)}")
        print("Subject: {}, n={} | train_f1: {:.5} | test_f1: {:.5}".format(
            subject, str(data_batch_test.__len__()).zfill(2), np.mean(f1_train), np.mean(f1)))
    #Calculate total f1-scores
    predictions = torch.cat([torch.tensor([torch.max(i, 1)[1].tolist() for i in outputs_list[i]]).T
                   for i in range(outputs_list.__len__())])
    labels = torch.cat(labels_list)
    f1_aus = [f1_score(predictions[:, i].cpu(), labels[:, i].cpu().data.numpy(), average="macro")
              for i in range(labels.shape[1])]
    f1_aus_binary = [f1_score(predictions[:, i].cpu(), labels[:, i].cpu().data.numpy(), average="binary")
                     for i in range(labels.shape[1])]
    print("All AUs: ",list(zip(action_units, f1_aus)))
    print("Mean f1: ", np.mean(f1_aus))
    print("Binary f1: ", np.mean(f1_aus_binary))
    return predictions

In [12]:
vals = []
action_units = ["AU1", "AU2", "AU4", "AU7", "AU12", "AU14", "AU15", "AU17"]
predictions = LOSO(pr_frames, df, action_units, epochs=400, lr=0.0001, weight_decay=0.001,
     dropout=0.5, batch_size=128)

Alpha: 0.0997099056839943, r1: 0.23313838243484497, r2: 0.20474375784397125
Subject: 01, n=09 | train_f1: 0.9908 | test_f1: 0.86351
Alpha: 0.642595648765564, r1: 0.05875437334179878, r2: 0.4094427824020386
Subject: 02, n=13 | train_f1: 0.98708 | test_f1: 0.83969
Alpha: 0.9148291945457458, r1: 0.09491778165102005, r2: 0.37089040875434875
Subject: 03, n=07 | train_f1: 0.98308 | test_f1: 0.80379
Alpha: 0.4587725102901459, r1: 0.10528364032506943, r2: 0.3440375328063965
Subject: 04, n=05 | train_f1: 1.0 | test_f1: 0.97024
Alpha: 0.5086752772331238, r1: 0.09204389899969101, r2: 0.3171556890010834
Subject: 05, n=19 | train_f1: 1.0 | test_f1: 0.79549
Alpha: 1.0054630041122437, r1: 0.09224669635295868, r2: 0.39841228723526
Subject: 06, n=05 | train_f1: 0.99644 | test_f1: 0.80159
Alpha: 1.3133655786514282, r1: 0.06624402105808258, r2: 0.4578610062599182
Subject: 07, n=09 | train_f1: 0.9978 | test_f1: 0.79072
Alpha: 0.8313130736351013, r1: 0.08014599978923798, r2: 0.37342938780784607
Subject: 08