In [1]:
import random
import itertools
import os

import utils.utils as utils
import utils.datasets as datasets

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import f1_score
from skimage.transform import resize
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

pd.set_option('display.max_columns', 50)
%load_ext autoreload
%autoreload 2

In [2]:
#make sure everything is deterministic
random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
import torchvision.models as models

In [4]:
device = torch.device("cuda:0")

In [3]:
df, load_data = datasets.megc("cropped")

uv_frames = np.load("../data/megc_uv_frames_secrets_of_OF.npy")
uv_frames = resize(uv_frames, (uv_frames.shape[0], 3, 60, 60))

In [17]:
import utils.datasets as datasets
df, uv_frames = datasets.megc(resize=64, optical_flow=True)
#uv_frames = resize(uv_frames, (uv_frames.shape[0], 3, 60, 60))

In [6]:
le = LabelEncoder()
labels = le.fit_transform(df["emotion"])
dataset = le.fit_transform(df["dataset"])

In [7]:
class MEData(Dataset):
    def __init__(self, frames, labels, dataset, transform=None):
        self.frames = frames
        self.labels = labels
        self.dataset = dataset
        self.transform = transform
        
    def __len__(self):
        return self.frames.shape[0]
    
    def __getitem__(self, idx):
        sample = self.frames[idx, ...]
        if self.transform:
            sample = self.transform(sample)
        label = self.labels[idx]
        dataset = self.dataset[idx]
        return sample, label, dataset

In [26]:
#SSSNet
class Net(nn.Module):
    def __init__(self, output_size, dropout):
        super(Net, self).__init__()
        h1 = 32
        h2 = 64
        h3 = 256
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=h1, kernel_size=5, stride=1)
        self.pool = nn.MaxPool2d(kernel_size=3, stride=3)
        self.bn1 = nn.BatchNorm2d(h1)
        self.drop1 = nn.Dropout2d(dropout)
        
        self.conv2 = nn.Conv2d(in_channels=h1, out_channels=h2, kernel_size=3, stride=1)
        self.bn2 = nn.BatchNorm2d(h2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.drop2 = nn.Dropout2d(dropout)

        self.fc1 = nn.Linear(9 ** 2 * h2, h3)
        self.drop = nn.Dropout(dropout)
        self.fc2 = nn.Linear(h3, 3)
        self.softmax = nn.Softmax(dim=1)
        
        
    def forward(self, x):
        x = self.drop1(self.bn1(self.pool(F.relu(self.conv1(x)))))
        x = self.drop2(self.bn2(self.pool2(F.relu(self.conv2(x)))))
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(self.drop(x))
        #x = self.softmax(x)
        return x

In [14]:
#RCN-A
class ConvBlock(nn.Module):
    """convolutional layer blocks for sequtial convolution operations"""
    def __init__(self, in_features, out_features, num_conv, pool=False):
        super(ConvBlock, self).__init__()
        features = [in_features] + [out_features for i in range(num_conv)]
        layers = []
        for i in range(len(features)-1):
            layers.append(nn.Conv2d(in_channels=features[i], out_channels=features[i+1], kernel_size=3, padding=1, bias=True))
            layers.append(nn.BatchNorm2d(num_features=features[i+1], affine=True, track_running_stats=True))
            layers.append(nn.ReLU())
            if pool:
                layers.append(nn.MaxPool2d(kernel_size=2, stride=2, padding=0))
        self.op = nn.Sequential(*layers)
    def forward(self, x):
        return self.op(x)

class RclBlock(nn.Module):
    """recurrent convolutional blocks"""
    def __init__(self, inplanes, planes):
        super(RclBlock, self).__init__()
        self.ffconv = nn.Sequential(
            nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(planes),
            nn.ReLU(inplace=True)
        )
        self.rrconv = nn.Sequential(
            nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(planes),
            nn.ReLU(inplace=True)
        )
        self.downsample = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Dropout()
        )

    def forward(self, x):
        y = self.ffconv(x)
        y = self.rrconv(x + y)
        y = self.rrconv(x + y)
        out = self.downsample (y)
        return out


class SpatialAttentionBlock_P(nn.Module):
    """linear attention block for any layers"""
    def __init__(self, normalize_attn=True):
        super(SpatialAttentionBlock_P, self).__init__()
        self.normalize_attn = normalize_attn

    def forward(self, l, w, classes):
        output_cam = []
        for idx in range(0,classes):
            weights = w[idx,:].reshape((l.shape[1], l.shape[2], l.shape[3]))
            cam = weights * l
            cam = cam.mean(dim=1,keepdim=True)
            cam = cam - torch.min(torch.min(cam,3,True)[0],2,True)[0]
            cam = cam / torch.max(torch.max(cam,3,True)[0],2,True)[0]
            output_cam.append(cam)
        output = torch.cat(output_cam, dim=1)
        output = output.mean(dim=1,keepdim=True)
        return output

def MakeLayer(block, planes, blocks):
    layers = []
    for _ in range(0, blocks):
        layers.append(block(planes, planes))
    return nn.Sequential(*layers)

class Net(nn.Module):
    """menet networks with adding attention unit
    """
    def __init__(self, num_classes=3, dropout=0.5, num_input=3, featuremaps=64, num_layers=1, pool_size=7, version=3):
        super(Net, self).__init__()
        self.version = version
        self.classes = num_classes
        self.conv1 = nn.Sequential(
            nn.Conv2d(num_input, featuremaps, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(featuremaps),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Dropout(),
        )
        self.rcls = MakeLayer(RclBlock, featuremaps, num_layers)
        self.attenmap = SpatialAttentionBlock_P(normalize_attn=True)
        self.downsampling = nn.AdaptiveAvgPool2d((pool_size, pool_size))
        self.avgpool = nn.AdaptiveAvgPool2d((pool_size, pool_size))
        self.classifier = nn.Linear(pool_size*pool_size*featuremaps, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        if self.version == 1:
            x = self.conv1(x)
            x = self.attenmap(x)
            x = self.rcls(x)
            x = self.avgpool(x)
        if self.version == 2:
            x = self.conv1(x)
            x = self.attenmap(x)
            x = self.rcls(x)
            x = self.avgpool(x)
        elif self.version == 3:
            x = self.conv1(x)
            y = self.attenmap(self.downsampling(x), self.classifier.weight, self.classes)
            x = self.rcls(x)
            x = self.avgpool(x)
            x = x * y
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [27]:
#resnet18
class Net(nn.Module):
    def __init__(self, output_size, dropout=0.5):
        super(Net, self).__init__()
        self.resnet18 = models.resnet18(pretrained=True)
        self.resnet18.fc = nn.Linear(512, output_size)
 
    def forward(self, x):
        x = self.resnet18(x)
        return x

sssnet
Total f1: 0.7617842853527486, SMIC: 0.7241661846925004, CASME2: 0.8568498168498168, SAMM: 0.6528043905093086
RCN-A
Total f1: 0.6821009635525764, SMIC: 0.7070837828119382, CASME2: 0.7392900050381739, SAMM: 0.5044458433748985
Resnet18
Total f1: 0.6978647449911385, SMIC: 0.6930187788311358, CASME2: 0.7663289429246877, SAMM: 0.5583756768708334

In [27]:
LOSO(uv_frames, df, epochs=200, lr=0.01, weight_decay=0.001,
     dropout=0.5, batch_size=128)

Subject: 006, n=11 | train_f1: 1.00000 | test_f1: 0.51852
Subject: 007, n=08 | train_f1: 1.00000 | test_f1: 0.27778
Subject: 009, n=04 | train_f1: 1.00000 | test_f1: 1.0
Subject: 01, n=03 | train_f1: 1.00000 | test_f1: 1.0
Subject: 010, n=04 | train_f1: 1.00000 | test_f1: 1.0
Subject: 011, n=20 | train_f1: 1.00000 | test_f1: 0.50476
Subject: 012, n=03 | train_f1: 1.00000 | test_f1: 0.22222
Subject: 013, n=06 | train_f1: 1.00000 | test_f1: 0.45455
Subject: 014, n=10 | train_f1: 1.00000 | test_f1: 0.61039
Subject: 015, n=03 | train_f1: 1.00000 | test_f1: 0.55556
Subject: 016, n=05 | train_f1: 1.00000 | test_f1: 0.82222
Subject: 017, n=04 | train_f1: 1.00000 | test_f1: 0.26667
Subject: 018, n=03 | train_f1: 1.00000 | test_f1: 0.4
Subject: 019, n=01 | train_f1: 1.00000 | test_f1: 1.0
Subject: 02, n=09 | train_f1: 1.00000 | test_f1: 0.53571
Subject: 020, n=04 | train_f1: 1.00000 | test_f1: 0.44444
Subject: 021, n=02 | train_f1: 1.00000 | test_f1: 1.0
Subject: 022, n=05 | train_f1: 1.00000 |

In [9]:
LOSO(uv_frames, df, epochs=200, lr=0.01, weight_decay=0.001,
     dropout=0.5, batch_size=128)

Subject: 006, n=11 | train_f1: 0.94551 | test_f1: 0.51852
Subject: 007, n=08 | train_f1: 0.97897 | test_f1: 0.52222
Subject: 009, n=04 | train_f1: 0.94963 | test_f1: 1.0
Subject: 01, n=03 | train_f1: 0.81737 | test_f1: 1.0
Subject: 010, n=04 | train_f1: 0.92511 | test_f1: 1.0
Subject: 011, n=20 | train_f1: 0.93299 | test_f1: 0.7619
Subject: 012, n=03 | train_f1: 0.93590 | test_f1: 0.25
Subject: 013, n=06 | train_f1: 0.98137 | test_f1: 0.45455
Subject: 014, n=10 | train_f1: 0.87460 | test_f1: 0.66667
Subject: 015, n=03 | train_f1: 0.80643 | test_f1: 0.22222
Subject: 016, n=05 | train_f1: 0.80492 | test_f1: 0.77778
Subject: 017, n=04 | train_f1: 0.90686 | test_f1: 0.26667
Subject: 018, n=03 | train_f1: 0.94222 | test_f1: 0.4
Subject: 019, n=01 | train_f1: 0.85317 | test_f1: 1.0
Subject: 02, n=09 | train_f1: 0.91249 | test_f1: 0.47619
Subject: 020, n=04 | train_f1: 0.91099 | test_f1: 0.55556
Subject: 021, n=02 | train_f1: 0.94200 | test_f1: 1.0
Subject: 022, n=05 | train_f1: 0.89923 | tes

In [22]:
def LOSO(features, df, epochs=200, lr=0.01, batch_size=128, dropout=0.5, weight_decay=0.001,
         verbose=True):
    random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed(1)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    outputs_list = []
    #groupby reorders elements, now the labels are in same order as outputs
    df_groupby = pd.concat([i[1] for i in df.groupby("subject")])
    dataset_groupby = df_groupby["dataset"]
    
    le = LabelEncoder()
    labels = le.fit_transform(df["emotion"])
    labels_groupby = le.transform(df_groupby["emotion"])

    #loop over each subject
    for group in df.groupby("subject"):
        subject = group[0]
        #split data to train and test based on the subject index
        train_index = df[df["subject"] != subject].index
        X_train = features[train_index, :]
        y_train = labels[train_index]
        dataset_train = dataset[train_index]
        
        test_index = df[df["subject"] == subject].index
        X_test = features[test_index, :]
        y_test = labels[test_index]
        dataset_test = dataset[test_index]

        #create pytorch dataloaders from the split
        megc_dataset_train = MEData(X_train, y_train, dataset_train, None)
        dataset_loader_train = torch.utils.data.DataLoader(megc_dataset_train,
                                                             batch_size=batch_size, shuffle=True,
                                                             num_workers=0)

        megc_dataset_test = MEData(X_test, y_test, dataset_test, None)
        dataset_loader_test = torch.utils.data.DataLoader(megc_dataset_test,
                                                         batch_size=100, shuffle=False,
                                                         num_workers=0)

        
        net = Net(df["emotion"].nunique(), dropout).float().to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
        net.train()
        for epoch in range(epochs):
            running_loss = 0.0
            for batch in dataset_loader_train:
                data_batch, labels_batch = batch[0].to(device), batch[1].to(device)

                optimizer.zero_grad()
                
                outputs = net(data_batch.float())
                loss = criterion(outputs, labels_batch.long())
                loss.backward()
                optimizer.step()

        #Test model
        net.eval()
        data_batch_test, labels_batch_test, _ = dataset_loader_test.__iter__().__next__()
        data_batch_test, labels_batch_test = data_batch_test.to(device), labels_batch_test.to(device)
        outputs = net(data_batch_test.float())
        _, prediction = outputs.max(1)
        prediction = prediction.cpu().data.numpy()
        outputs_list.append(prediction)
        
        train_outputs = net(data_batch.float())
        _, train_prediction = train_outputs.max(1)
        train_prediction = train_prediction.cpu().data.numpy()
        train_f1 = f1_score(labels_batch.cpu().data.numpy(), train_prediction, average="macro")
        test_f1 = f1_score(labels_batch_test.cpu().data.numpy(), prediction, average="macro")
        
        
        #Print statistics
        if verbose:
            print("Subject: {}, n={} | train_f1: {:.5f} | test_f1: {:.5}".format(
                subject, str(labels_batch_test.shape[0]).zfill(2), train_f1, test_f1))
            
    outputs = np.concatenate(outputs_list)
    f1_total = f1_score(labels_groupby, outputs, average="macro")
    idx = dataset_groupby == "smic"
    f1_smic = f1_score(labels_groupby[idx], outputs[idx], average="macro")
    idx = dataset_groupby == "casme2"
    f1_casme2 = f1_score(labels_groupby[idx], outputs[idx], average="macro")
    idx = dataset_groupby == "samm"
    f1_samm = f1_score(labels_groupby[idx], outputs[idx], average="macro")
    print("Total f1: {}, SMIC: {}, CASME2: {}, SAMM: {}".format(f1_total, f1_smic, f1_casme2, f1_samm))