In [1]:
import meb
from meb import utils
from meb import datasets
from meb import core
from meb import models

from functools import partial
from typing import List, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
import timm
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier



pd.set_option("display.max_columns", 50)
%load_ext autoreload
%autoreload 2

This notebook uses the traditional_methods.py file that requires dlib and numba. You can install them by including the optinal flag

`
pip install .[optional]
`

The dlib model for landmarks `shape_predictor_68_face_landmarks.dat` should be located in the data folder

## Defining handcrafted validation

In [2]:
class HandCraftedCrossValidator(core.CrossDatasetValidator):
    def __init__(self, config: core.Config, verbose: bool = True):
        super().__init__(config)
        self.verbose = verbose
        
    def train_model(
        self, train_data: np.ndarray, train_labels: np.ndarray
        ) -> None:
        self.model.fit(train_data, train_labels)
        
    def evaluate_model(
        self, data: np.ndarray, labels: np.ndarray, test: bool = False
        ) -> List[float] |Tuple[List[float] | torch.tensor]:
        predictions = self.model.predict(data)
        results = self.evaluation_fn(torch.tensor(labels), torch.tensor(predictions))
        if test:
            return results, torch.tensor(predictions)
        return results
        
    def validate_split(self, df: pd.DataFrame, input_data: np.ndarray, labels: np.ndarray, split_name: str):
        """Main setup of each split. Should be called by the overriden validate method."""
        train_data, train_labels, test_data, test_labels = self.split_data(
            df[self.split_column], input_data, labels, split_name
        )
        self.model = self.cf.model()

        self.train_model(train_data, train_labels)
        train_metrics = self.evaluate_model(train_data, train_labels)
        test_metrics, outputs_test = self.evaluate_model(test_data, test_labels, test=True)
        return train_metrics, test_metrics, outputs_test

## Constant

In [3]:
c = datasets.CrossDataset(ignore_validation=True)
df = c.data_frame
data = np.zeros((df.shape[0], 2))

In [4]:
class Constant:
    """Always predicts ones"""
    n_labels: int
    def fit(self, X, y):
        self.n_labels = y.shape[-1]
    
    def predict(self, X):
        return np.ones((X.shape[0], self.n_labels))

In [5]:
class Config(core.Config):
    action_units = utils.dataset_aus["cross"]
    evaluation_fn = [
        partial(utils.MultiLabelF1Score, average="macro"),
        partial(utils.MultiLabelF1Score, average="binary")
    ]
    model = Constant

In [6]:
HandCraftedCrossValidator(Config).validate_n_times(df, data)

  0%|                                                     | 0/5 [00:00<?, ?it/s]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.08937 | test_mean: 0.0804
Test per AU: [('AU1', 10.85), ('AU2', 8.25), ('AU4', 27.03), ('AU5', 0.0), ('AU6', 0.53), ('AU7', 2.07), ('AU9', 17.47), ('AU10', 1.56), ('AU12', 4.55), ('AU14', 10.85), ('AU15', 6.9), ('AU17', 6.44)]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.1787 | test_mean: 0.1608
Test per AU: [('AU1', 21.7), ('AU2', 16.5), ('AU4', 54.05), ('AU5', 0.0), ('AU6', 1.05), ('AU7', 4.15), ('AU9', 34.93), ('AU10', 3.12), ('AU12', 9.09), ('AU14', 21.7), ('AU15', 13.79), ('AU17', 12.87)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.08736 | test_mean: 0.097
Test per AU: [('AU1', 9.22), ('AU2', 7.91), ('AU4', 33.68), ('AU5', 0.78), ('AU6', 4.83), ('AU7', 13.22), ('AU9', 4.83), ('AU10', 5.88), ('AU12', 11.72), ('AU14', 9.54), ('AU15', 5.88), ('AU17', 8.9)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.1747 | test_mean: 0.194
Test per AU: [('AU1', 18.44), ('AU2', 15.83), ('AU4', 67.36), ('AU5

 20%|█████████                                    | 1/5 [00:00<00:03,  1.33it/s]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.08937 | test_mean: 0.0804
Test per AU: [('AU1', 10.85), ('AU2', 8.25), ('AU4', 27.03), ('AU5', 0.0), ('AU6', 0.53), ('AU7', 2.07), ('AU9', 17.47), ('AU10', 1.56), ('AU12', 4.55), ('AU14', 10.85), ('AU15', 6.9), ('AU17', 6.44)]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.1787 | test_mean: 0.1608
Test per AU: [('AU1', 21.7), ('AU2', 16.5), ('AU4', 54.05), ('AU5', 0.0), ('AU6', 1.05), ('AU7', 4.15), ('AU9', 34.93), ('AU10', 3.12), ('AU12', 9.09), ('AU14', 21.7), ('AU15', 13.79), ('AU17', 12.87)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.08736 | test_mean: 0.097
Test per AU: [('AU1', 9.22), ('AU2', 7.91), ('AU4', 33.68), ('AU5', 0.78), ('AU6', 4.83), ('AU7', 13.22), ('AU9', 4.83), ('AU10', 5.88), ('AU12', 11.72), ('AU14', 9.54), ('AU15', 5.88), ('AU17', 8.9)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.1747 | test_mean: 0.194
Test per AU: [('AU1', 18.44), ('AU2', 15.83), ('AU4', 67.36), ('AU5

 40%|██████████████████                           | 2/5 [00:01<00:02,  1.33it/s]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.08937 | test_mean: 0.0804
Test per AU: [('AU1', 10.85), ('AU2', 8.25), ('AU4', 27.03), ('AU5', 0.0), ('AU6', 0.53), ('AU7', 2.07), ('AU9', 17.47), ('AU10', 1.56), ('AU12', 4.55), ('AU14', 10.85), ('AU15', 6.9), ('AU17', 6.44)]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.1787 | test_mean: 0.1608
Test per AU: [('AU1', 21.7), ('AU2', 16.5), ('AU4', 54.05), ('AU5', 0.0), ('AU6', 1.05), ('AU7', 4.15), ('AU9', 34.93), ('AU10', 3.12), ('AU12', 9.09), ('AU14', 21.7), ('AU15', 13.79), ('AU17', 12.87)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.08736 | test_mean: 0.097
Test per AU: [('AU1', 9.22), ('AU2', 7.91), ('AU4', 33.68), ('AU5', 0.78), ('AU6', 4.83), ('AU7', 13.22), ('AU9', 4.83), ('AU10', 5.88), ('AU12', 11.72), ('AU14', 9.54), ('AU15', 5.88), ('AU17', 8.9)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.1747 | test_mean: 0.194
Test per AU: [('AU1', 18.44), ('AU2', 15.83), ('AU4', 67.36), ('AU5

 60%|███████████████████████████                  | 3/5 [00:02<00:01,  1.37it/s]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.08937 | test_mean: 0.0804
Test per AU: [('AU1', 10.85), ('AU2', 8.25), ('AU4', 27.03), ('AU5', 0.0), ('AU6', 0.53), ('AU7', 2.07), ('AU9', 17.47), ('AU10', 1.56), ('AU12', 4.55), ('AU14', 10.85), ('AU15', 6.9), ('AU17', 6.44)]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.1787 | test_mean: 0.1608
Test per AU: [('AU1', 21.7), ('AU2', 16.5), ('AU4', 54.05), ('AU5', 0.0), ('AU6', 1.05), ('AU7', 4.15), ('AU9', 34.93), ('AU10', 3.12), ('AU12', 9.09), ('AU14', 21.7), ('AU15', 13.79), ('AU17', 12.87)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.08736 | test_mean: 0.097
Test per AU: [('AU1', 9.22), ('AU2', 7.91), ('AU4', 33.68), ('AU5', 0.78), ('AU6', 4.83), ('AU7', 13.22), ('AU9', 4.83), ('AU10', 5.88), ('AU12', 11.72), ('AU14', 9.54), ('AU15', 5.88), ('AU17', 8.9)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.1747 | test_mean: 0.194
Test per AU: [('AU1', 18.44), ('AU2', 15.83), ('AU4', 67.36), ('AU5

 80%|████████████████████████████████████         | 4/5 [00:02<00:00,  1.40it/s]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.08937 | test_mean: 0.0804
Test per AU: [('AU1', 10.85), ('AU2', 8.25), ('AU4', 27.03), ('AU5', 0.0), ('AU6', 0.53), ('AU7', 2.07), ('AU9', 17.47), ('AU10', 1.56), ('AU12', 4.55), ('AU14', 10.85), ('AU15', 6.9), ('AU17', 6.44)]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 0.1787 | test_mean: 0.1608
Test per AU: [('AU1', 21.7), ('AU2', 16.5), ('AU4', 54.05), ('AU5', 0.0), ('AU6', 1.05), ('AU7', 4.15), ('AU9', 34.93), ('AU10', 3.12), ('AU12', 9.09), ('AU14', 21.7), ('AU15', 13.79), ('AU17', 12.87)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.08736 | test_mean: 0.097
Test per AU: [('AU1', 9.22), ('AU2', 7.91), ('AU4', 33.68), ('AU5', 0.78), ('AU6', 4.83), ('AU7', 13.22), ('AU9', 4.83), ('AU10', 5.88), ('AU12', 11.72), ('AU14', 9.54), ('AU15', 5.88), ('AU17', 8.9)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 0.1747 | test_mean: 0.194
Test per AU: [('AU1', 18.44), ('AU2', 15.83), ('AU4', 67.36), ('AU5

100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.39it/s]

MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
13.0 & 12.1 & 25.8 & 6.2 & 2.9 & 11.0 & 5.4 & 3.6 & 8.0 & 12.0 & 2.5 & 4.1 & 8.9

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
8.0 & 9.7 & 7.7 & 10.1 & 10.4 & 7.6 & 8.9
MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
26.0 & 24.2 & 51.7 & 12.4 & 5.7 & 22.1 & 10.9 & 7.1 & 15.9 & 24.0 & 5.0 & 8.1 & 17.8

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
16.1 & 19.4 & 15.3 & 20.2 & 20.7 & 15.1 & 17.8





# SVM + OF

In [14]:
c = datasets.CrossDataset(resize=32, optical_flow=True)
df = c.data_frame
data = c.data

In [15]:
class Config(core.Config):
    action_units = utils.dataset_aus["cross"]
    evaluation_fn = [
        partial(utils.MultiLabelF1Score, average="macro"),
        partial(utils.MultiLabelF1Score, average="binary")
    ]
    # Multioutput classifiers for multi-label
    model = partial(MultiOutputClassifier, SVC(C=28))

In [16]:
HandCraftedCrossValidator(Config).validate_n_times(df, data.reshape(2031, -1))

100%|████████████████████████████████████████████| 5/5 [22:14<00:00, 266.88s/it]

MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
83.3 & 80.5 & 88.0 & 54.7 & 54.3 & 68.8 & 56.8 & 54.1 & 60.5 & 65.9 & 58.1 & 65.0 & 65.8

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
63.1 & 69.5 & 67.3 & 63.1 & 63.2 & 64.4 & 65.1
MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
71.0 & 65.9 & 84.2 & 13.6 & 10.6 & 44.4 & 17.2 & 10.3 & 26.7 & 39.1 & 18.2 & 32.7 & 36.2

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
31.9 & 42.8 & 38.7 & 31.5 & 31.6 & 33.1 & 34.9





In [55]:
out = HandCraftedCrossValidator(Config).validate(df, data.reshape(2031, -1))

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 1.0 | test_mean: 0.601
Test per AU: [('AU1', 72.84), ('AU2', 81.74), ('AU4', 69.71), ('AU5', 43.75), ('AU6', 48.64), ('AU7', 46.65), ('AU9', 68.3), ('AU10', 47.06), ('AU12', 54.66), ('AU14', 58.97), ('AU15', 68.25), ('AU17', 60.65)]

MultiLabelF1Score
Dataset: casme, n=189 | train_mean: 1.0 | test_mean: 0.2907
Test per AU: [('AU1', 51.28), ('AU2', 66.67), ('AU4', 62.5), ('AU5', 0.0), ('AU6', 0.0), ('AU7', 4.88), ('AU9', 46.67), ('AU10', 0.0), ('AU12', 20.83), ('AU14', 28.57), ('AU15', 41.38), ('AU17', 26.09)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 1.0 | test_mean: 0.7018
Test per AU: [('AU1', 87.9), ('AU2', 81.95), ('AU4', 89.45), ('AU5', 49.31), ('AU6', 70.81), ('AU7', 70.52), ('AU9', 53.94), ('AU10', 53.54), ('AU12', 73.21), ('AU14', 67.43), ('AU15', 76.25), ('AU17', 67.8)]

MultiLabelF1Score
Dataset: casme2, n=256 | train_mean: 1.0 | test_mean: 0.4525
Test per AU: [('AU1', 78.69), ('AU2', 67.86), ('AU4', 89.33), 

# MDMO

In [4]:
from meb.utils import traditional_methods as tm

In [5]:
c = datasets.CrossDataset(resize=112, cropped=True)
df = c.data_frame
data = c.data



In [10]:
# Transpose data to fit the format of mdmo
data = (video.transpose(1, 2, 0) for video in data)

In [11]:
mdmo_features = tm.calculate_mdmo(df, data)

0 / 2031
10 / 2031
20 / 2031
30 / 2031
40 / 2031
50 / 2031
60 / 2031
70 / 2031
80 / 2031
90 / 2031
100 / 2031
110 / 2031
120 / 2031
130 / 2031
140 / 2031
150 / 2031
160 / 2031
170 / 2031
180 / 2031
190 / 2031
200 / 2031
210 / 2031
220 / 2031
230 / 2031
240 / 2031
250 / 2031
260 / 2031
270 / 2031
280 / 2031
290 / 2031
300 / 2031
310 / 2031
320 / 2031
330 / 2031
340 / 2031
350 / 2031
360 / 2031
370 / 2031
380 / 2031
390 / 2031
400 / 2031
410 / 2031
420 / 2031
430 / 2031
440 / 2031
450 / 2031
460 / 2031
470 / 2031
480 / 2031
490 / 2031
500 / 2031
510 / 2031
520 / 2031
530 / 2031
540 / 2031
550 / 2031
560 / 2031
570 / 2031
580 / 2031
590 / 2031
600 / 2031
610 / 2031
620 / 2031
630 / 2031
640 / 2031
650 / 2031
660 / 2031
670 / 2031
680 / 2031
690 / 2031
700 / 2031
710 / 2031
720 / 2031
730 / 2031
740 / 2031
750 / 2031
760 / 2031
770 / 2031
780 / 2031
790 / 2031
800 / 2031
810 / 2031
820 / 2031
830 / 2031
840 / 2031
850 / 2031
860 / 2031
870 / 2031
880 / 2031
890 / 2031
900 / 2031
910 / 2031

In [12]:
beta = 0.8
mdmo_features[:, :36] = beta * mdmo_features[:, :36]
mdmo_features[:, 36:] = (1 - beta) * mdmo_features[:, 36:]

In [11]:
mdmo_features = np.load("../../../data/mdmo_cross_dataset.npy")

In [12]:
class Config(core.Config):
    action_units = utils.dataset_aus["cross"]
    evaluation_fn = [
        partial(utils.MultiLabelF1Score, average="macro"),
        partial(utils.MultiLabelF1Score, average="binary")
    ]
    # Multioutput classifiers for multi-label
    model = partial(MultiOutputClassifier, SVC(C=28))

In [13]:
out = HandCraftedCrossValidator(Config).validate_n_times(df, mdmo_features)

100%|█████████████████████████████████████████████| 5/5 [01:33<00:00, 18.61s/it]

MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
76.6 & 75.6 & 79.4 & 55.8 & 50.1 & 53.9 & 53.6 & 48.6 & 60.7 & 58.5 & 55.7 & 62.5 & 60.9

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
62.0 & 63.2 & 57.6 & 60.5 & 61.6 & 58.8 & 60.6
MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
59.4 & 57.0 & 72.2 & 16.7 & 2.3 & 18.1 & 10.9 & 0.0 & 27.4 & 25.5 & 13.0 & 27.8 & 27.5

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
29.5 & 31.7 & 21.1 & 27.5 & 29.1 & 23.3 & 27.0





# LBP-TOP

In [3]:
c = datasets.CrossDataset(resize=112, cropped=True)
df = c.data_frame
data = c.data



In [39]:
# Transpose data to fit the format of mdmo
data = (video.transpose(1, 2, 0) for video in data)
# Extrapolate using timm for cases with less than 10 frames
data = (video if video.shape[-1] >= 10 else tm.tim(video, 10) for i, video in enumerate(data))

In [40]:
lbptop_features = tm.calculate_lbptop(df, data)

0 / 2031
30 / 2031
60 / 2031
90 / 2031
120 / 2031
150 / 2031
180 / 2031
210 / 2031
240 / 2031
270 / 2031
300 / 2031
330 / 2031
360 / 2031
390 / 2031
420 / 2031
450 / 2031
480 / 2031
510 / 2031
540 / 2031
570 / 2031
600 / 2031
630 / 2031
660 / 2031
690 / 2031
720 / 2031
750 / 2031
780 / 2031
810 / 2031
840 / 2031
870 / 2031
900 / 2031
930 / 2031
960 / 2031
990 / 2031
1020 / 2031
1050 / 2031
1080 / 2031
1110 / 2031
1140 / 2031
1170 / 2031
1200 / 2031
1230 / 2031
1260 / 2031
1290 / 2031
1320 / 2031
1350 / 2031
1380 / 2031
1410 / 2031
1440 / 2031
1470 / 2031
1500 / 2031
1530 / 2031
1560 / 2031
1590 / 2031
1620 / 2031
1650 / 2031
1680 / 2031
1710 / 2031
1740 / 2031
1770 / 2031
1800 / 2031
1830 / 2031
1860 / 2031
1890 / 2031
1920 / 2031
1950 / 2031
1980 / 2031
2010 / 2031


In [4]:
lbptop_features = np.load("../../../data/lbptop_cross_dataset.npyss_dataset.npy")

In [9]:
class Config(core.Config):
    action_units = utils.dataset_aus["cross"]
    evaluation_fn = [
        partial(utils.MultiLabelF1Score, average="macro"),
        partial(utils.MultiLabelF1Score, average="binary")
    ]
    # Multioutput classifiers for multi-label
    model = partial(MultiOutputClassifier, SVC(kernel="poly", gamma="scale"))

In [10]:
out = HandCraftedCrossValidator(Config).validate_n_times(df, lbptop_features, n_times=5)

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
100%|████████████████████████████████████████████| 5/5 [56:30<00:00, 678.05s/it]

MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
67.4 & 65.1 & 73.9 & 48.3 & 49.3 & 46.7 & 49.4 & 49.1 & 47.7 & 48.2 & 49.4 & 48.9 & 53.6

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
60.9 & 55.8 & 52.4 & 53.8 & 57.4 & 49.8 & 55.0
MultiLabelF1Score
AUS: ['AU1', 'AU2', 'AU4', 'AU5', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'Average']
41.6 & 36.7 & 62.0 & 0.0 & 0.0 & 0.0 & 1.7 & 0.0 & 0.0 & 3.5 & 0.0 & 0.0 & 12.1

Datasets:  ['casme', 'casme2', 'samm', 'fourd', 'mmew', 'casme3a', 'Average']
17.9 & 16.5 & 9.7 & 13.8 & 20.1 & 4.4 & 13.7



