In [15]:
import random
import itertools
import os
import math

import utils.utils as utils
import utils.datasets as datasets
import utils.traditional_methods as tm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import f1_score
from skimage.transform import resize
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

pd.set_option('display.max_columns', 50)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
#make sure everything is deterministic
random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
device = torch.device("cuda:0")

In [3]:
device = torch.device("cpu")

In [12]:
df, load_data = datasets.cross_dataset(color=True)

In [4]:
df, load_data = datasets.casme2(color=True)

In [25]:
n_frames = 10
pr_frames = []
for i, video in enumerate(load_data):
    idx = np.round(np.linspace(0, video.shape[-1] - 1, n_frames)).astype("int")
    video_resized = resize(video[..., idx], (112, 112, 3, 10)).transpose(2, 3, 0, 1)
    #video_resized = video[..., idx].transpose(2, 3, 0, 1)
    pr_frames.append(video_resized)

In [None]:
#Fix loading all data as color. Currently SAMM does not work

In [18]:
n_frames = 50
pr_frames = []
for i, video in tqdm(enumerate(load_data), total=len(load_data)):
    video = tm.tim(video.transpose(1, 2, 3, 0), n_frames)
    #video = resize(video, (112, 112, 3, 50))
    video = video.transpose(2, 3, 0, 1)
    pr_frames.append(video)

 38%|███▊      | 445/1171 [14:44<43:42,  3.61s/it]  

ValueError: could not broadcast input array from shape (462,461) into shape (462,461,3)

In [5]:
from functools import partial

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm3d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3x3(planes, planes)
        self.bn2 = nn.BatchNorm3d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)

        return out
    
    
def conv3x3x3(in_planes, out_planes, stride=1):
    # 3x3x3 convolution with padding
    return nn.Conv3d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=1,
        bias=False)


class Covpool(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        x = input

        batchSize = x.data.shape[0]
        dim = x.data.shape[1]
        h = x.data.shape[2]
        w = x.data.shape[3]
        M = h*w
        x = x.reshape(batchSize,dim,M)
        I_hat = (-1./M/M)*torch.ones(M,M,device = x.device) + (1./M)*torch.eye(M,M,device = x.device)
        I_hat = I_hat.view(1,M,M).repeat(batchSize,1,1).type(x.dtype)
        y = x.bmm(I_hat).bmm(x.transpose(1,2))
        ctx.save_for_backward(input,I_hat)
        return y
    @staticmethod
    def backward(ctx, grad_output):
        input,I_hat = ctx.saved_tensors
        x = input
        batchSize = x.data.shape[0]
        dim = x.data.shape[1]
        h = x.data.shape[2]
        w = x.data.shape[3]
        M = h*w
        x = x.reshape(batchSize,dim,M)
        grad_input = grad_output + grad_output.transpose(1,2)
        grad_input = grad_input.bmm(x).bmm(I_hat)
        grad_input = grad_input.reshape(batchSize,dim,h,w)
        return grad_input

    
def cov_feature(x):
    batchsize = x.data.shape[0]
    dim = x.data.shape[1]
    h = x.data.shape[2]
    w = x.data.shape[3]
    M = h*w
    x = x.reshape(batchsize,dim,M)
    I_hat = (-1./M/M)*torch.ones(dim,dim,device = x.device) + (1./M)*torch.eye(dim,dim,device = x.device)
    I_hat = I_hat.view(1,dim,dim).repeat(batchsize,1,1).type(x.dtype)
    y = (x.transpose(1,2)).bmm(I_hat).bmm(x)
    return y


def downsample_basic_block(x, planes, stride):
    out = F.avg_pool3d(x, kernel_size=1, stride=stride)
    zero_pads = torch.Tensor(
        out.size(0), planes - out.size(1), out.size(2), out.size(3),
        out.size(4)).zero_()
    if isinstance(out.data, torch.cuda.FloatTensor):
        zero_pads = zero_pads.to(device)

    out = torch.autograd.Variable(torch.cat([out.data, zero_pads], dim=1))

    return out

    
def CovpoolLayer(var):
    return Covpool.apply(var)


class ResNet_multiple(nn.Module):

    def __init__(self, block, layers, sample_size, sample_duration,
                 shortcut_type='A', num_classes=2, task_num=8):
        self.inplanes = 64
        super(ResNet_multiple, self).__init__()
        self.conv1 = nn.Conv3d(3, 64, kernel_size=7, stride=(1, 2, 2),
            padding=(3, 3, 3), bias=False)
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
        self.layer2 = self._make_layer(
            block, 128, layers[1], shortcut_type, stride=2)
        self.layer3 = self._make_layer(
            block, 256, layers[2], shortcut_type, stride=2)
        self.layer4 = self._make_layer(
            block, 512, layers[3], shortcut_type, stride=2)
        
        ############################################################
        
        last_duration = int(math.ceil(sample_duration / 10))
        last_size = int(math.ceil(sample_size / 28))
        self.tanh=nn.Tanh()
        self.avgpool = nn.AvgPool3d(
            (last_duration, last_size, last_size), stride=1)

        self.avgpool2d = nn.AvgPool2d(
            (last_size, last_size), stride=1)

        fc_num = 512
        self.task_num = task_num

        self.fcs = nn.ModuleList([nn.Linear(fc_num, num_classes) for _ in range(self.task_num)])
        self.isqrt_dim = 16
 
        self.layer_reduce = nn.Conv2d(512, self.isqrt_dim, kernel_size=1, stride=1, padding=0,
                                          bias=False)
        self.layer_reduce_bn = nn.BatchNorm2d(self.isqrt_dim)
        self.layer_reduce_relu = nn.ReLU(inplace=True)
      

        last_size = 4
        self.sp_reso = last_size * last_size
        self.row_bn_for_spatial = nn.BatchNorm2d(self.sp_reso)
        self.relu_normal = nn.ReLU(inplace=False)
        self.row_conv_group_for_spatial = nn.Conv2d( 
                 self.sp_reso, self.sp_reso*4, kernel_size=(self.sp_reso, 1), 
                 groups=self.sp_reso, bias=True)
        self.fc_adapt_channels_for_spatial = nn.Conv2d(
                 self.sp_reso * last_size, self.sp_reso, kernel_size=1, groups=1, bias=True)
        self.sigmoid = nn.Sigmoid()
        
        self.ch_dim = 512  #512 2048
        self.expansion = 1 #1
        planes = 512  #512 2048
        self.row_bn = nn.BatchNorm2d(self.expansion * self.ch_dim)
        self.row_conv_group = nn.Conv2d(
             self.ch_dim, self.ch_dim, 
             kernel_size=(self.ch_dim, 1), 
             groups = self.ch_dim, bias=True)
        self.fc_adapt_channels = nn.Conv2d(
             self.ch_dim, self.expansion*self.ch_dim, 
             kernel_size=1, groups=1, bias=True)
        self.sigmoid = nn.Sigmoid()

        self.conv_for_DR = nn.Conv2d(
             planes * self.expansion, self.ch_dim, 
             kernel_size=1,stride=2, bias=True)
        self.bn_for_DR = nn.BatchNorm2d(self.expansion*self.ch_dim)
        self.row_bn = nn.BatchNorm2d(self.expansion*self.ch_dim)
            #row-wise conv is realized by group conv

        #####cooncat###############
        self.groups_base = 32
        self.groups = int(planes * self.expansion / 64)
        self.factor = int(math.log(self.groups_base / self.groups, 2))
        self.padding_num = self.factor + 2
        self.conv_kernel_size = self.factor * 2 + 5
        self.dilate_conv_for_concat1 = nn.Conv2d(planes * self.expansion, 
                                                planes * self.expansion, 
                                                kernel_size=(self.conv_kernel_size,1), 
                                                stride=1, padding=(self.padding_num,0),
                                                groups=self.groups, bias=True)
        
        self.dilate_conv_for_concat2 = nn.Conv2d(planes * self.expansion, 
                                                planes * self.expansion, 
                                                kernel_size=(self.conv_kernel_size,1), 
                                                stride=1, padding=(self.padding_num,0),
                                                groups=self.groups, bias=True)

        self.bn_for_concat = nn.BatchNorm2d(planes * self.expansion)         
        #####cooncat###############
        #self.fc = nn.Linear(512, num_classes)     
        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                m.weight = nn.init.kaiming_normal_(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()


    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            if shortcut_type == 'A':
                downsample = partial(
                    downsample_basic_block,
                    planes=planes * block.expansion,
                    stride=stride)
            else:
                downsample = nn.Sequential(
                    nn.Conv3d(
                        self.inplanes,
                        planes * block.expansion,
                        kernel_size=1,
                        stride=stride,
                        bias=False), nn.BatchNorm3d(planes * block.expansion))

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    
    def chan_att(self, out):
        out = CovpoolLayer(out) # Nxdxd
        out = out.view(out.size(0), out.size(1), out.size(2), 1).contiguous() # Nxdxdx1
        out = self.row_bn(out)
        out = self.row_conv_group(out) # Nx512x1x1
        out = self.sigmoid(out) #NxCx1x1
        return out


    def pos_att(self, out):
        out = cov_feature(out) # Nx16x16
        out = out.view(out.size(0), out.size(1), out.size(2), 1).contiguous()  # Nx16x16x1
        out = self.row_bn_for_spatial(out)

        out = self.row_conv_group_for_spatial(out) # Nx256x1x1
        out = self.relu(out)

        out = self.fc_adapt_channels_for_spatial(out) #Nx64x1x1
        out = self.sigmoid(out) 
        out = out.view(out.size(0), 1, 4, 4).contiguous()#Nx1x8x8
        return out


    def downsample_long_block(self, x, planes, stride):
        out = F.avg_pool3d(x, kernel_size=1, stride=stride)
        zero_pads = torch.Tensor(
            out.size(0), planes - out.size(1), out.size(2), out.size(3),
            out.size(4)).zero_()
        if isinstance(out.data, torch.cuda.FloatTensor):
            zero_pads = zero_pads.to(device)

        out = Variable(torch.cat([out.data, zero_pads], dim=1))

        return out


    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        residual0 = x
        x = self.layer1(x)
        residual1 = x
        x = self.layer2(x)
        residual2 = x
        x = self.layer3(x)
        x = self.layer4(x)
        x=x.squeeze(2)

        pre_att = x
        chan_att = self.chan_att(x)
        pos_att = self.pos_att(x)
        out1 = self.dilate_conv_for_concat1(pre_att * chan_att)
        out2 = self.dilate_conv_for_concat2(self.relu(pre_att * pos_att))
        out = (out1 * pre_att * chan_att) + out2 * (self.relu(pre_att * pos_att))
        x = self.bn_for_concat(out)
        x = self.avgpool(x)
        feature_map = x
        
        x = x.view(x.size(0), -1)

        xs = [self.fcs[i](x) for i in range(self.task_num)]
        return xs, feature_map
    
    
def resnet_multiple18(**kwargs):
    """Constructs a ResNet-18 model.
    """
    model = ResNet_multiple(BasicBlock, [2, 2, 2, 2], **kwargs)
    return model

In [6]:
class MultiScaleCornerCrop(object):
    def __init__(self, scales, size, interpolation=Image.BILINEAR,
                 crop_positions=['c', 'tl', 'tr', 'bl', 'br']):
        self.scales = scales
        self.size = size
        self.interpolation = interpolation

        self.crop_positions = crop_positions

    def __call__(self, img):
        min_length = min(img.size[0], img.size[1])
        crop_size = int(min_length * self.scale)

        image_width = img.size[0]
        image_height = img.size[1]

        if self.crop_position == 'c':
            center_x = image_width // 2
            center_y = image_height // 2
            box_half = crop_size // 2
            x1 = center_x - box_half
            y1 = center_y - box_half
            x2 = center_x + box_half
            y2 = center_y + box_half
        elif self.crop_position == 'tl':
            x1 = 0
            y1 = 0
            x2 = crop_size
            y2 = crop_size
        elif self.crop_position == 'tr':
            x1 = image_width - crop_size
            y1 = 0
            x2 = image_width
            y2 = crop_size
        elif self.crop_position == 'bl':
            x1 = 0
            y1 = image_height - crop_size
            x2 = crop_size
            y2 = image_height
        elif self.crop_position == 'br':
            x1 = image_width - crop_size
            y1 = image_height - crop_size
            x2 = image_width
            y2 = image_height

        img = img.crop((x1, y1, x2, y2))

        return img.resize((self.size, self.size), self.interpolation)

    def randomize_parameters(self):
        self.scale = self.scales[random.randint(0, len(self.scales) - 1)]
        self.crop_position = self.crop_positions[random.randint(
            0,
            len(self.crop_positions) - 1)]
        
        
class RandomHorizontalFlip(object):
    """Horizontally flip the given PIL.Image randomly with a probability of 0.5."""

    def __call__(self, img):
        """
        Args:
            img (PIL.Image): Image to be flipped.
        Returns:
            PIL.Image: Randomly flipped image.
        """
        if self.p < 0.5:
            return img.transpose(Image.FLIP_LEFT_RIGHT)
        return img

    def randomize_parameters(self):
        self.p = random.random()


class Scale(object):
    """Rescale the input PIL.Image to the given size.
    Args:
        size (sequence or int): Desired output size. If size is a sequence like
            (w, h), output size will be matched to this. If size is an int,
            smaller edge of the image will be matched to this number.
            i.e, if height > width, then image will be rescaled to
            (size * height / width, size)
        interpolation (int, optional): Desired interpolation. Default is
            ``PIL.Image.BILINEAR``
    """

    def __init__(self, size, interpolation=Image.BILINEAR):
        assert isinstance(size,
                          int) or (isinstance(size, collections.Iterable) and
                                   len(size) == 2)
        self.size = size
        self.interpolation = interpolation

    def __call__(self, img):
        """
        Args:
            img (PIL.Image): Image to be scaled.
        Returns:
            PIL.Image: Rescaled image.
        """
        if isinstance(self.size, int):
            w, h = img.size
            if (w <= h and w == self.size) or (h <= w and h == self.size):
                return img
            if w < h:
                ow = self.size
                oh = int(self.size * h / w)
                return img.resize((ow, oh), self.interpolation)
            else:
                oh = self.size
                ow = int(self.size * w / h)
                return img.resize((ow, oh), self.interpolation)
        else:
            return img.resize(self.size, self.interpolation)

    def randomize_parameters(self):
        pass


class CenterCrop(object):
    """Crops the given PIL.Image at the center.
    Args:
        size (sequence or int): Desired output size of the crop. If size is an
            int instead of sequence like (h, w), a square crop (size, size) is
            made.
    """

    def __init__(self, size):
        if isinstance(size, int):
            self.size = (int(size), int(size))
        else:
            self.size = size

    def __call__(self, img):
        """
        Args:
            img (PIL.Image): Image to be cropped.
        Returns:
            PIL.Image: Cropped image.
        """
        w, h = img.size
        th, tw = self.size
        x1 = int(round((w - tw) / 2.))
        y1 = int(round((h - th) / 2.))
        return img.crop((x1, y1, x1 + tw, y1 + th))

    def randomize_parameters(self):
        pass


In [7]:
class MEGC(Dataset):
    def __init__(self, frames, labels, transform=None):
        self.frames = frames
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.frames)
    
    def __getitem__(self, idx):
        sample = self.frames[idx]
        if self.transform:
            sample = self.transform["temporal"](sample).astype("uint8")
            # Randomize parameters and make sure they are the same for all frames of a video
            [self.transform["spatial"].transforms[i].randomize_parameters() for i in range(2)]
            # Reshape for PIL (F, H, W, C)
            sample = sample.transpose(1, 2, 3, 0)
            # Into PIL image for the transforms
            sample = [self.transform["spatial"](Image.fromarray(img)) for img in sample]
            # List to torch tensor
            sample = torch.stack(sample).permute(1, 0, 2, 3)            
            
        label = self.labels[idx]
        
        return sample, label

In [8]:
class MultiTaskLoss(nn.Module):
    def __init__(self, task_num):
        super(MultiTaskLoss, self).__init__()
        self.task_num = task_num
        self.log_vars = nn.Parameter(torch.zeros((task_num)))

    def forward(self, preds, labels):
        criterion = nn.CrossEntropyLoss()
        losses = [criterion(preds[i], labels[:, i]) for i in range(self.task_num)]
        
        return sum(losses)

In [9]:
class MultiTaskF1(nn.Module):
    def __init__(self, task_num):
        super(MultiTaskF1, self).__init__()
        self.task_num = task_num
        
    def calc_f1(self, label, prediction):
        _, predicted = torch.max(prediction, 1)
        f1 = f1_score(label.cpu(), predicted.cpu().data.numpy(), average="macro")
        return f1
                        
    def forward(self, preds, labels):
        f1s = [self.calc_f1(labels[:, i], preds[i]) for i in range(self.task_num)]
        return f1s

In [10]:
spatial_train_transform = transforms.Compose([
    MultiScaleCornerCrop([1.0], 112),
    RandomHorizontalFlip(),
    transforms.ToTensor()
])

spatial_test_transform = transforms.Compose([
    Scale(112),
    CenterCrop(112),
    transforms.ToTensor()
])

In [11]:
def temporal_train_transform(video):
    idx = list(range(np.random.randint(0, 5), 50, 5))
    video = video[:, idx]
    return video

def temporal_test_transform(video):
    idx = list(range(0, 50, 5))
    video = video[:, idx]
    return video

In [12]:
def get_fine_tuning_parameters(model, ft_begin_index):
    if ft_begin_index == 0:
        return model.parameters()

    ft_module_names = []
    for i in range(ft_begin_index, 5):
        ft_module_names.append('layer{}'.format(i))

    ft_module_names.append('fc')
    parameters = []
    for k, v in model.named_parameters():
        for ft_module in ft_module_names:
            if ft_module in k:
                parameters.append({'params': v})
                break
        else:
            parameters.append({'params': v, 'lr': 0.0001})

    return parameters

SCA no transforms
All AUs:  [('AU1', 0.5798611111111112), ('AU2', 0.5935505542492442), ('AU4', 0.7383783783783784), ('AU7', 0.4474885844748858), ('AU12', 0.7061680427391938), ('AU14', 0.4669603524229075), ('AU15', 0.5768795639206007), ('AU17', 0.5716308708992212)]
Mean f1:  0.5851146822744429
Binary f1:  0.26168866708794114

temporal transform
All AUs:  [('AU1', 0.5956208585422069), ('AU2', 0.5177536231884058), ('AU4', 0.7102490421455939), ('AU7', 0.45248868778280543), ('AU12', 0.6385220125786163), ('AU14', 0.47692307692307695), ('AU15', 0.5122606650990931), ('AU17', 0.5226216990136813)]
Mean f1:  0.5533049581591849
Binary f1:  0.21157106782106785

temporal + spatial transform
All AUs:  [('AU1', 0.46578366445916114), ('AU2', 0.47619047619047616), ('AU4', 0.6071092882166965), ('AU7', 0.44495412844036697), ('AU12', 0.625), ('AU14', 0.4635376853692096), ('AU15', 0.45617977528089887), ('AU17', 0.5956075435664837)]
Mean f1:  0.5167953201904116
Binary f1:  0.16387887471574786

spatial transform
binary 0.18...

pretrained

temporal + spatial transform
All AUs:  [('AU1', 0.5368834080717488), ('AU2', 0.5535635096610707), ('AU4', 0.6813793103448276), ('AU7', 0.5105543022881083), ('AU12', 0.6359217979170473), ('AU14', 0.5896656534954408), ('AU15', 0.7489905155413654), ('AU17', 0.5678884873515746)]
Mean f1:  0.603105873083898
Binary f1:  0.31826910491640625

no transform
All AUs:  [('AU1', 0.49), ('AU2', 0.5026315789473685), ('AU4', 0.6451814768460575), ('AU7', 0.428235294117647), ('AU12', 0.657608695652174), ('AU14', 0.4713986464711569), ('AU15', 0.5993926247288504), ('AU17', 0.6236448115642746)]
Mean f1:  0.5522616410409411
Binary f1:  0.2322092966746305

60epochs
binary f1: 0.21...
200 epochs
All AUs:  [('AU1', 0.5381578947368422), ('AU2', 0.5316629955947136), ('AU4', 0.6443498978897209), ('AU7', 0.5208333333333334), ('AU12', 0.646462653811062), ('AU14', 0.48115386291341666), ('AU15', 0.5465599229519051), ('AU17', 0.641214933558321)]
Mean f1:  0.5687994368486644
Binary f1:  0.25947735784787607

In [17]:
action_units = ["AU1", "AU2", "AU4", "AU7", "AU12", "AU14", "AU15", "AU17"]
idx = df[action_units].sum(1) > 0
pr_frames2 = [pr_frames[i] for i in df[idx].index.tolist()]
predictions = LOSO(pr_frames2, df[idx].reset_index(), action_units, epochs=200, lr=0.001, weight_decay=0.001,
     dropout=0.5, batch_size=32)

Subject: 01, n=08 | train_f1: 1.0 | test_f1: 0.67298
Subject: 02, n=13 | train_f1: 1.0 | test_f1: 0.53793
Subject: 03, n=07 | train_f1: 1.0 | test_f1: 0.57142
Subject: 04, n=05 | train_f1: 0.97667 | test_f1: 0.76314
Subject: 05, n=15 | train_f1: 0.92857 | test_f1: 0.63335
Subject: 06, n=04 | train_f1: 1.0 | test_f1: 0.60238
Subject: 07, n=08 | train_f1: 1.0 | test_f1: 0.7619
Subject: 08, n=03 | train_f1: 1.0 | test_f1: 0.775
Subject: 09, n=13 | train_f1: 0.95079 | test_f1: 0.5973
Subject: 10, n=13 | train_f1: 0.84221 | test_f1: 0.92262
Subject: 11, n=10 | train_f1: 0.93382 | test_f1: 0.86477
Subject: 12, n=12 | train_f1: 0.82163 | test_f1: 0.65354
Subject: 13, n=08 | train_f1: 1.0 | test_f1: 0.5978
Subject: 14, n=04 | train_f1: 1.0 | test_f1: 0.86667
Subject: 15, n=03 | train_f1: 1.0 | test_f1: 0.73333
Subject: 16, n=04 | train_f1: 1.0 | test_f1: 0.7119
Subject: 17, n=36 | train_f1: 1.0 | test_f1: 0.47389
Subject: 18, n=03 | train_f1: 1.0 | test_f1: 0.675
Subject: 19, n=15 | train_f1: 

In [None]:
!kill 23964

[autoreload of utils.utils failed: Traceback (most recent call last):
  File "/home/tvaranka/.local/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/home/tvaranka/.local/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 410, in superreload
    update_generic(old_obj, new_obj)
  File "/home/tvaranka/.local/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/home/tvaranka/.local/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 302, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/home/tvaranka/.local/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/home/tvaranka/.local/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 302, in update_class
    if update_generic(old_obj, new_obj): continue
  Fi

In [15]:
def LOSO(features, df, action_units, epochs=200, lr=0.01, batch_size=128, dropout=0.05, weight_decay=0.001):
    random.seed(1)
    torch.manual_seed(1)
    np.random.seed(1)
    torch.cuda.manual_seed(1)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    labels = np.concatenate([np.expand_dims(df[au], 1) for au in action_units], axis=1)
    outputs_list = []
    labels_list = []
    for group in df.groupby("subject"):
        subject = group[0]
        #Split data
        train_index = df[df["subject"] != subject].index
        X_train = [features[i] for i in train_index]
        y_train = labels[train_index]
        
        test_index = df[df["subject"] == subject].index
        X_test = [features[i] for i in test_index]
        y_test = labels[test_index]
        
        megc_dataset_train = MEGC(X_train, y_train, {"temporal": temporal_train_transform, "spatial": spatial_train_transform})
        #megc_dataset_train = MEGC(X_train, y_train, None)
        dataset_loader_train = torch.utils.data.DataLoader(megc_dataset_train,
                                                             batch_size=batch_size, shuffle=True,
                                                             num_workers=0)

        megc_dataset_test = MEGC(X_test, y_test, {"temporal": temporal_test_transform, "spatial": spatial_test_transform})
        #megc_dataset_test = MEGC(X_test, y_test, None)
        dataset_loader_test = torch.utils.data.DataLoader(megc_dataset_test,
                                                         batch_size=100, shuffle=False,
                                                         num_workers=0)
        
        net = resnet_multiple18(task_num=8, num_classes=2, sample_size=112, sample_duration=10).to(device)
        pretrained_model = torch.load("data/resnet-18-kinetics-ucf101_split1.pth", map_location=device)["state_dict"]
        net_state_dict = net.state_dict()
        new_state_dict = {k[7:]: v for k, v in pretrained_model.items() if k[7:] in net_state_dict.keys()}
        net_state_dict.update(new_state_dict)
        net.load_state_dict(net_state_dict)
        params_for_optimizer = get_fine_tuning_parameters(net, 4)
        
        criterion = MultiTaskLoss(labels.shape[1])
        evaluation = MultiTaskF1(labels.shape[1])
        optimizer = optim.SGD(params_for_optimizer, lr=lr, weight_decay=weight_decay, momentum=0.9)

        for epoch in range(epochs):  # loop over the dataset multiple times
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr * (0.1 ** (epoch // 30))
            for batch in dataset_loader_train:
                data_batch, labels_batch = batch[0].to(device), batch[1].to(device)
                optimizer.zero_grad()

                outputs, _ = net(data_batch.float())
                loss = criterion(outputs, labels_batch.long())
                loss.backward()
                optimizer.step()

        #eval
        net.eval()
        data_batch_test, labels_batch_test = dataset_loader_test.__iter__().__next__()
        data_batch_test, labels_batch_test = data_batch_test.to(device), labels_batch_test.to(device)
        outputs, _ = net(data_batch_test.float())
        outputs = [output.detach().cpu() for output in outputs]
        outputs_list.append(outputs)
        labels_list.append(labels_batch_test.cpu())
        train_outputs, _ = net(data_batch.float())
        net.train()
        f1_train = evaluation(train_outputs, labels_batch.long())
        f1 = evaluation(outputs, labels_batch_test.long())
        print("Subject: {}, n={} | train_f1: {:.5} | test_f1: {:.5}".format(
            subject, str(data_batch_test.__len__()).zfill(2), np.mean(f1_train), np.mean(f1)))
    #Calculate total f1-scores
    predictions = torch.cat([torch.tensor([torch.max(i, 1)[1].tolist() for i in outputs_list[i]]).T
                   for i in range(outputs_list.__len__())])
    labels = torch.cat(labels_list)
    f1_aus = [f1_score(predictions[:, i].cpu(), labels[:, i].cpu().data.numpy(), average="macro")
              for i in range(labels.shape[1])]
    f1_aus_binary = [f1_score(predictions[:, i].cpu(), labels[:, i].cpu().data.numpy(), average="binary")
                     for i in range(labels.shape[1])]
    print("All AUs: ",list(zip(action_units, f1_aus)))
    print("Mean f1: ", np.mean(f1_aus))
    print("Binary f1: ", np.mean(f1_aus_binary))
    return predictions