In [1]:
import meb
from meb import utils
from meb import datasets
from meb import core
from meb import models

from functools import partial
from typing import List, Tuple

import numpy as np
import pandas as pd
from numba import jit, njit
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
import timm
from tqdm import tqdm
import torch.nn.functional as F

pd.set_option("display.max_columns", 50)
%load_ext autoreload
%autoreload 2

In [2]:
c = datasets.CrossDataset(resize=64, color=False, preload=True)
df = c.data_frame
data = c.data

Loading data: 100%|███████████████████████████| 189/189 [00:21<00:00,  8.79it/s]
Loading data: 100%|███████████████████████████| 256/256 [01:40<00:00,  2.54it/s]
Loading data: 100%|███████████████████████████| 159/159 [01:13<00:00,  2.18it/s]
Loading data: 100%|███████████████████████████| 267/267 [00:19<00:00, 13.85it/s]
Loading data: 100%|███████████████████████████| 300/300 [00:52<00:00,  5.77it/s]
Loading data: 100%|███████████████████████████| 860/860 [02:00<00:00,  7.14it/s]




In [3]:
for i, video in enumerate(data):
    data[i] = np.expand_dims(video, -1)

In [4]:
#interpolate samples with less than 8 frames
n_frames = 8
for i, video in enumerate(data):
    if video.shape[0] < n_frames:
        new_shape = (n_frames,) + video.shape[1:-1]
        video = torch.tensor(video).permute(3, 0, 1, 2).unsqueeze(0).float()
        new_video = F.interpolate(video, size=new_shape, mode="trilinear")
        data[i] = new_video.squeeze(0).permute(1, 2, 3, 0).byte().numpy()

In [5]:
class LED(nn.Module):
    """The Learnable Eulerian Dynamics module
    
    The LED module constructs the LED matrix based on Eulerian video
    magnification in a linear fashion. A tensor contraction is then
    performed with the matrix and the input (RGB images). The output
    is normalized to account lighting changes.
    """
    def __init__(
        self,
        alpha: float = 10.0,
        r1: float = 0.4,
        r2: float = 0.05
    ) -> None:
        super().__init__()
        self.alpha = nn.Parameter(torch.log(torch.tensor(alpha)))
        self.r1 = nn.Parameter(torch.log(torch.tensor(r1)))
        self.r2 = nn.Parameter(torch.log(torch.tensor(r2)))
    
    def _calculate_W(self, x: torch.tensor) -> torch.tensor:
        T = x.shape[2]
        W = x.new(T, T)
        alpha = torch.exp(self.alpha)
        r1 = torch.exp(self.r1)
        r2 = torch.exp(self.r2)
        #construct W
        for i in range(T):
            for j in range(T):
                a = j - i
                b = min(1, i)
                if j > i:
                    W[i, j] = alpha * (1 - r1) ** a * r1 ** b \
                              - alpha * (1 - r2) ** a * r2 ** b
                elif j == i:
                    W[i, j] = alpha * (r1 - r2)
        return W
    
    def forward(self, x: torch.tensor) -> torch.tensor:
        W = self._calculate_W(x)
        out = torch.einsum("scfhw,fx->scxhw", x, W)
        div = torch.einsum("scfhw,fx->scxhw", x, torch.abs(W))
        # For stability
        out /= div + 1
        # Keep first frame as original
        out[:, :, 0] = x[:, :, 0]
        return out

In [6]:
class Net(nn.Module):
    def __init__(self, class_num, dropout=0.5):
        super().__init__()
        self.class_num = class_num
        h1 = 32
        h2 = 64
        h3 = 256
        self.conv1 = nn.Conv3d(in_channels=1, out_channels=h1, kernel_size=(1, 5, 5), stride=1)
        self.pool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 3, 3))
        self.bn1 = nn.BatchNorm3d(h1)
        self.drop1 = nn.Dropout3d(dropout)
        
        self.conv2 = nn.Conv3d(in_channels=h1, out_channels=h2, kernel_size=(2, 3, 3), stride=1)
        self.bn2 = nn.BatchNorm3d(h2)
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
        self.drop2 = nn.Dropout3d(dropout)

        self.fc1 = nn.Linear(9 ** 2 * 2 * h2, h3)
        self.fc = nn.Linear(h3, self.class_num)
        self.drop3 = nn.Dropout(dropout)

        self.led = LED()
        
    def forward(self, x):
        x = self.led(x)
        x = x[:, :, 1:]
     
        x = self.drop1(self.bn1(self.pool(F.relu(self.conv1(x)))))
        x = self.drop2(self.bn2(self.pool2(F.relu(self.conv2(x)))))

        x = x.view(x.shape[0], -1)

        x = F.relu(self.fc1(x))
        x = self.drop3(x)
        x = self.fc(x)
        return x

In [7]:
class Config(core.Config):
    device = torch.device("cuda:1")
    action_units = utils.dataset_aus["cross"]
    epochs = 400
    batch_size = 64
    evaluation_fn = [
        partial(utils.MultiLabelF1Score, average="binary"),
    ]
    train_transform = {
        "spatial": None,
        "temporal": datasets.NoisyUniformTemporalSubsample(6),
    }
    test_transform = {
        "spatial": None,
        "temporal": datasets.UniformTemporalSubsample(6),
    }
    model = partial(Net, class_num=len(action_units))

In [8]:
class LEDValidator(core.CrossDatasetValidator):
    def __init__(self, config: Config, verbose: bool = True):
        super().__init__(config)
        self.verbose = verbose
    def train_model(
        self,
        train_loader: torch.utils.data.DataLoader,
        test_loader: torch.utils.data.DataLoader,
    ) -> None:
        """Main training loop. Can be overriden for custom training loops."""
        for epoch in tqdm(range(self.cf.epochs), disable=self.disable_tqdm):
            if epoch == 50:
                self.optimizer.param_groups[0]["lr"] *= 0.1
            self.train_one_epoch(epoch, train_loader)
            if self.scheduler:
                self.scheduler.step(epoch + 1)
            if self.cf.validation_interval:
                if (epoch + 1) % self.cf.validation_interval == 0:
                    train_metrics = self.evaluate_model(train_loader)
                    test_metrics, outputs_test = self.evaluate_model(
                        test_loader, test=True
                    )
                    self.printer.print_train_test_validation(
                        train_metrics, test_metrics, epoch
                    )
    def setup_training(self) -> None:
        """
        Sets up the training modules, including model, criterion, optimizer, scheduler
        and mixup.
        """
        self.model = self.cf.model()
        self.criterion = self.cf.criterion()
        self.model.to(self.cf.device)
        #self.optimizer = self.cf.optimizer(self.model.parameters())
        #self.optimizer = self.cf.optimizer(
        #    [
        #    {"params": list(self.model.parameters())[:3], "lr": 0.1},
        #    {"params": list(self.model.parameters())[3:]}
        #    ]
        #)
        self.optimizer = self.cf.optimizer(
            [
            {"params": list(self.model.parameters())[-3:], "lr": 0.1},
            {"params": list(self.model.parameters())[:-3]}
            ]
        )
        self.scheduler = (
            self.cf.scheduler(self.optimizer) if self.cf.scheduler else None
        )
        self.mixup_fn = self.cf.mixup_fn() if self.cf.mixup_fn else None

In [20]:
LEDValidator(Config).validate_n_times(df, data, n_times=5)

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
100%|███████████████████████████████████████| 5/5 [14:20:50<00:00, 10330.07s/it]

MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
73.2 & 69.6 & 73.7 & 51.9 & 49.6 & 54.3 & 55.2 & 53.1 & 60.6 & 64.8 & 65.9 & 64.9 & 61.4

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
67.2 & 67.3 & 58.6 & 63.7 & 60.5 & 55.4 & 62.1
MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
52.7 & 45.7 & 63.7 & 7.9 & 0.7 & 19.3 & 13.6 & 8.5 & 26.5 & 36.7 & 33.0 & 31.7 & 28.3

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
37.5 & 38.6 & 22.3 & 33.9 & 26.8 & 16.7 & 29.3



