# Importing Modules

In [1]:
# coding: utf-8
from __future__ import print_function, division
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.autograd import Variable

from PIL import Image
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy
import pickle

# Important Parameters

In [2]:
device = "cuda" if torch.cuda.is_available() else 'cpu'
root_dir = "Dataset/"
epochs = 1
batch_size = 3
maxFaces = 15

# Dataset & Loaders

In [3]:
neg_train = sorted(os.listdir('Dataset/emotiw/train/'+'Negative/'))
neu_train = sorted(os.listdir('Dataset/emotiw/train/'+'Neutral/'))
pos_train = sorted(os.listdir('Dataset/emotiw/train/'+'Positive/'))

train_filelist = neg_train + neu_train + pos_train

val_filelist = []
test_filelist = []

with open('Dataset/val_list', 'rb') as fp:
    val_filelist = pickle.load(fp)

with open('Dataset/test_list', 'rb') as fp:
    test_filelist = pickle.load(fp)

for i in train_filelist:
    if i[0] != 'p' and i[0] != 'n':
        train_filelist.remove(i)
        
for i in val_filelist:
    if i[0] != 'p' and i[0] != 'n':
        val_filelist.remove(i)

dataset_sizes = [len(train_filelist), len(val_filelist), len(test_filelist)]
print(dataset_sizes)

[9808, 3341, 1000]


In [4]:
train_global_data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_global_data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

train_faces_data_transform = transforms.Compose([
        transforms.Resize((96,112)),
        transforms.ToTensor()
    ])

val_faces_data_transform = transforms.Compose([
        transforms.Resize((96,112)),
        transforms.ToTensor()
    ])

In [5]:
class EmotiWDataset(Dataset):
    
    def __init__(self, filelist, root_dir, loadTrain=True, transformGlobal=transforms.ToTensor(), transformFaces = transforms.ToTensor()):
        """
        Args:
            filelist: List of names of image/feature files.
            root_dir: Dataset directory
            transform (callable, optional): Optional transformer to be applied
                                            on an image sample.
        """
        
        self.filelist = filelist
        self.root_dir = root_dir
        self.transformGlobal = transformGlobal
        self.transformFaces = transformFaces
        self.loadTrain = loadTrain
            
    def __len__(self):
        if self.loadTrain:
            return (len(train_filelist)) 
        else:
            return (len(val_filelist))  
    
    def __getitem__(self, idx):
        train = ''
        if self.loadTrain:
            train = 'train'
        else:
            train = 'val'
        filename = self.filelist[idx].split('.')[0]
        labeldict = {'neg':'Negative',
                     'neu':'Neutral',
                     'pos':'Positive',
                     'Negative': 0,
                     'Neutral': 1,
                     'Positive':2}

        labelname = labeldict[filename.split('_')[0]]

        #IMAGE 

        image = Image.open(self.root_dir+'emotiw/'+train+'/'+labelname+'/'+filename+'.jpg')
        if self.transformGlobal:
            image = self.transformGlobal(image)
        if image.shape[0] == 1:
            image_1 = np.zeros((3, 224, 224), dtype = float)
            image_1[0] = image
            image_1[1] = image
            image_1[2] = image
            image = image_1
            image = torch.FloatTensor(image.tolist()) 
        
        #FEATURES FROM MTCNN

        features = np.load(self.root_dir+'FaceFeatures/'+train+'/'+labelname+'/'+filename+'.npz')['a']
        numberFaces = features.shape[0]
        maxNumber = min(numberFaces, maxFaces)
        

        features1 = np.zeros((maxFaces, 256), dtype = 'float32')
        for i in range(maxNumber):
            features1[i] = features[i]
        features1 = torch.from_numpy(features1)

        #ALIGNED CROPPED FACE IMAGES

        features2 = np.zeros((maxFaces, 3, 96, 112), dtype = 'float32')
#         print(maxNumber)
        
        for i in range(maxNumber):
            face = Image.open(self.root_dir + 'AlignedCroppedImages/'+train+'/'+ labelname + '/' + filename+ '_' + str(i) + '.jpg')
                
            if self.transformFaces:
                face = self.transformFaces(face)
                
            features2[i] = face.numpy()
            
        features2 = torch.from_numpy(features2)

        #SAMPLE
        sample = {'image': image, 'features_mtcnn': features1, 'features_aligned':features2, 'label':labeldict[labelname], 'numberFaces': numberFaces}
        return sample

In [6]:
train_dataset = EmotiWDataset(train_filelist, root_dir, loadTrain = True, transformGlobal=train_global_data_transform, transformFaces=train_faces_data_transform)

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, num_workers=0)

val_dataset = EmotiWDataset(val_filelist, root_dir, loadTrain=False, transformGlobal = val_global_data_transform, transformFaces=val_faces_data_transform)

val_dataloader = DataLoader(val_dataset, shuffle =True, batch_size = batch_size, num_workers = 0)

print('Dataset Loaded')

Dataset Loaded


# SphereFace Model For Aligned Models

In [7]:
class LSoftmaxLinear(nn.Module):

    def __init__(self, input_dim, output_dim, margin):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.margin = margin

        self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim))

        self.divisor = math.pi / self.margin
        self.coeffs = binom(margin, range(0, margin + 1, 2))
        self.cos_exps = range(self.margin, -1, -2)
        self.sin_sq_exps = range(len(self.cos_exps))
        self.signs = [1]
        for i in range(1, len(self.sin_sq_exps)):
            self.signs.append(self.signs[-1] * -1)

    def reset_parameters(self):
        nn.init.kaiming_normal(self.weight.data.t())

    def find_k(self, cos):
        acos = cos.acos()
        k = (acos / self.divisor).floor().detach()
        return k

    def forward(self, input, target=None):
        if self.training:
            assert target is not None
            logit = input.matmul(self.weight)
            batch_size = logit.size(0)
            logit_target = logit[range(batch_size), target]
            weight_target_norm = self.weight[:, target].norm(p=2, dim=0)
            input_norm = input.norm(p=2, dim=1)
            norm_target_prod = weight_target_norm * input_norm
            cos_target = logit_target / (norm_target_prod + 1e-10)
            sin_sq_target = 1 - cos_target**2
            
            weight_nontarget_norm = self.weight.norm(p=2, dim=0)
            
            norm_nontarget_prod = torch.zeros((batch_size,numClasses), dtype = torch.float)
            logit2 = torch.zeros((batch_size,numClasses), dtype = torch.float)
            logit3 = torch.zeros((batch_size,numClasses), dtype = torch.float)

            norm_nontarget_prod = norm_nontarget_prod.to(device)
            logit2 = logit2.to(device)
            logit3 = logit3.to(device)
            
            for i in range(numClasses):
                norm_nontarget_prod[:, i] = weight_nontarget_norm[i] * input_norm 
                logit2[:, i] = norm_target_prod / (norm_nontarget_prod[:, i] + 1e-10)
            
            for i in range(batch_size):
                for j in range(numClasses):
                    logit3[i][j] = logit2[i][j] * logit[i][j]

            num_ns = self.margin//2 + 1
            coeffs = Variable(input.data.new(self.coeffs))
            cos_exps = Variable(input.data.new(self.cos_exps))
            sin_sq_exps = Variable(input.data.new(self.sin_sq_exps))
            signs = Variable(input.data.new(self.signs))

            cos_terms = cos_target.unsqueeze(1) ** cos_exps.unsqueeze(0)
            sin_sq_terms = (sin_sq_target.unsqueeze(1)
                            ** sin_sq_exps.unsqueeze(0))

            cosm_terms = (signs.unsqueeze(0) * coeffs.unsqueeze(0)
                          * cos_terms * sin_sq_terms)
            cosm = cosm_terms.sum(1)
            k = self.find_k(cos_target)
            
            ls_target = norm_target_prod * (((-1)**k * cosm) - 2*k)
            logit3[range(batch_size), target] = ls_target
            return logit
        else:
            assert target is None
            return input.matmul(self.weight)

class sphere20a(nn.Module):
    def __init__(self,classnum=3,feature=False):
        super(sphere20a, self).__init__()
        self.classnum = classnum
        self.feature = feature
        #input = B*3*112*96
        self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48
        self.relu1_1 = nn.PReLU(64)
        self.conv1_2 = nn.Conv2d(64,64,3,1,1)
        self.relu1_2 = nn.PReLU(64)
        self.conv1_3 = nn.Conv2d(64,64,3,1,1)
        self.relu1_3 = nn.PReLU(64)

        self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24
        self.relu2_1 = nn.PReLU(128)
        self.conv2_2 = nn.Conv2d(128,128,3,1,1)
        self.relu2_2 = nn.PReLU(128)
        self.conv2_3 = nn.Conv2d(128,128,3,1,1)
        self.relu2_3 = nn.PReLU(128)

        self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24
        self.relu2_4 = nn.PReLU(128)
        self.conv2_5 = nn.Conv2d(128,128,3,1,1)
        self.relu2_5 = nn.PReLU(128)


        self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12
        self.relu3_1 = nn.PReLU(256)
        self.conv3_2 = nn.Conv2d(256,256,3,1,1)
        self.relu3_2 = nn.PReLU(256)
        self.conv3_3 = nn.Conv2d(256,256,3,1,1)
        self.relu3_3 = nn.PReLU(256)

        self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
        self.relu3_4 = nn.PReLU(256)
        self.conv3_5 = nn.Conv2d(256,256,3,1,1)
        self.relu3_5 = nn.PReLU(256)

        self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
        self.relu3_6 = nn.PReLU(256)
        self.conv3_7 = nn.Conv2d(256,256,3,1,1)
        self.relu3_7 = nn.PReLU(256)

        self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
        self.relu3_8 = nn.PReLU(256)
        self.conv3_9 = nn.Conv2d(256,256,3,1,1)
        self.relu3_9 = nn.PReLU(256)

        self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6
        self.relu4_1 = nn.PReLU(512)
        self.conv4_2 = nn.Conv2d(512,512,3,1,1)
        self.relu4_2 = nn.PReLU(512)
        self.conv4_3 = nn.Conv2d(512,512,3,1,1)
        self.relu4_3 = nn.PReLU(512)

        self.fc5 = nn.Linear(512*7*6,512)
        self.fc6 = LSoftmaxLinear(512,self.classnum, 4)

    def forward(self, x, y):
        x = self.relu1_1(self.conv1_1(x))
        x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x))))

        x = self.relu2_1(self.conv2_1(x))
        x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x))))
        x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x))))

        x = self.relu3_1(self.conv3_1(x))
        x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x))))
        x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x))))
        x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x))))
        x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x))))

        x = self.relu4_1(self.conv4_1(x))
        x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x))))

        x = x.view(x.size(0),-1)
        x = (self.fc5(x))
#         print(x)
        if self.feature: return x

        x = self.fc6(x)
#         x = self.fc6(x, None)

        return x

# Model 1 
## Pretrained EmotiW DenseNet (DenseNet161_EmotiW)

In [8]:
global_model = torch.load('./TrainedModels/EnsembleModels/DenseNet161_EmotiW', map_location=lambda storage, loc: storage).module
model1 = global_model
print('Pretrained EmotiW DenseNet Loaded! (Model 1)')

Pretrained EmotiW DenseNet Loaded! (Model 1)


# Model 2
## Pretrained EmotiC DenseNet (densenet_emotiw_pretrainemotic_lr001)

In [9]:
model2 = models.densenet161(pretrained=False)
num_ftrs = model2.classifier.in_features
model2.classifier = nn.Linear(num_ftrs, 3)

model2 = model2.to(device)
model2 = nn.DataParallel(model2)
model2.load_state_dict(torch.load('./TrainedModels/EnsembleModels/densenet_emotiw_pretrainemotic_lr001.pt', map_location=lambda storage, loc: storage))
model2 = model2.module

print('Pretrained EmotiC DenseNet Loaded! (Model 2)')

Pretrained EmotiC DenseNet Loaded! (Model 2)


# Model 3
## Aligned Model Global Level (AlignedModelTrainerSoftmax_AlignedModel_EmotiW_lr01_Softmax)

In [10]:
class FaceAttention(nn.Module):
    def __init__(self, non_align_model):
        super(FaceAttention, self).__init__()
        
        self.non_align_model = non_align_model
    
    def forward(self, face_features_initial, numberFaces, labels):
        
        maxNumber = np.minimum(numberFaces, maxFaces).float()
        maxNumber = maxNumber.to(device)

        face_features = torch.zeros((face_features_initial.shape[0],maxFaces,3), dtype = torch.float)
        
        for j in range(face_features_initial.shape[0]):
            face = face_features_initial[j]
            tensor = torch.zeros((2,), dtype=torch.long)
            faceLabels = tensor.new_full((maxFaces,), labels[j], dtype = torch.long)
            faceLabels = faceLabels.to(device)
            face_features[j, :, :] = self.non_align_model.forward(face, faceLabels)
            
        face_features = face_features.to(device)
        
        face_features_sum = torch.zeros((face_features_initial.shape[0], 3), dtype = torch.float)
        face_features_sum = face_features_sum.to(device)
        
        face_features_avg = torch.zeros((face_features_initial.shape[0], 3), dtype = torch.float)
        face_features_avg = face_features_avg.to(device)

        for i in range(face_features_initial.shape[0]):
            for j in range(int(maxNumber[i])):
                face_features_sum[i] = face_features_sum[i] + face_features[i][j]
                
            if int(maxNumber[i]) != 0:
                y = float(maxNumber[i])
                face_features_avg[i] = face_features_sum[i] / y

        return face_features_avg

In [11]:
aligned_model_global_level_path = "./TrainedModels/EnsembleModels/AlignedModelTrainerSoftmax_AlignedModel_EmotiW_lr01_Softmax"
align_model = torch.load(aligned_model_global_level_path, map_location=lambda storage, loc: storage).module
model3 = align_model
print('Aligned Model Global Level Loaded! (Model 3)')

Aligned Model Global Level Loaded! (Model 3)


# Model 4 
## Aligned Model Image Level Trained (AlignedModel_EmotiW_lr01_Softmax)

In [12]:
aligned_model_image_level_path = './TrainedModels/EnsembleModels/AlignedModel_EmotiW_lr01_Softmax'
align_model = torch.load(aligned_model_image_level_path, map_location=lambda storage, loc: storage).module

class FaceAttention(nn.Module):
    def __init__(self, non_align_model):
        super(FaceAttention, self).__init__()
        
        self.non_align_model = non_align_model
    
    def forward(self, face_features_initial, numberFaces, labels):
#         global_features_initial = self.global_model.forward(image)
# #         print(global_features_initial)
#         global_features_initial = Variable(global_features_initial)
#         global_features = global_features_initial.view(-1,1,256)
#         batch_size = global_features.shape[0]
        
        maxNumber = np.minimum(numberFaces, maxFaces)
        maxNumber = maxNumber.to(device)

        face_features = torch.zeros((face_features_initial.shape[0],maxFaces,3), dtype = torch.float)
        
        for j in range(face_features_initial.shape[0]):
#             for i in range(int(maxNumber[j])):
            face = face_features_initial[j]
            # print(face.shape)
#                 face = face.squeeze(1)
            # print(face.shape)
            face_features[j, :, :] = self.non_align_model.forward(face, labels)
            
        face_features = face_features.to(device)
        face_features_sum = torch.zeros((face_features_initial.shape[0], 3), dtype = torch.float)

        face_features_sum = face_features_sum.to(device)

        for i in range(face_features_initial.shape[0]):
            for j in range(int(maxNumber[i])):
                face_features_sum[i] = face_features_sum[i] + face_features[i][j]
                
            face_features_sum[i] = face_features_sum[i] / (maxNumber[i].float())
            
        return face_features_sum


model4 = FaceAttention(align_model)
print('Aligned Model Image Level Loaded! (Model 4)')

Aligned Model Image Level Loaded! (Model 4)


# Model 5
## Avg. Face Features Concat Model (PretrainedDenseNetAvgFaceFeatures-FineTune-2208-3-NoSoftmax-Reg-lr001)

In [13]:
class FaceAttention(nn.Module):
    def __init__(self, global_model):
        super(FaceAttention, self).__init__()
        
        self.global_model = global_model
        self.global_fc3_debug = nn.Linear(2464, 3)
        nn.init.kaiming_normal_(self.global_fc3_debug.weight)
        self.global_fc3_debug.bias.data.fill_(0.01)
        self.bn_global = nn.BatchNorm1d(2208, affine=False)
        self.bn_face_features = nn.BatchNorm1d(256, affine=False)
        self.dropout_classifier = nn.Dropout(0.5)
    
    def forward(self, image, face_features, numberFaces):
        features = self.global_model.forward(image)

        out = F.relu(features, inplace=True)
        global_features_initial = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1)

        global_features_initial = Variable(global_features_initial)
        batch_size = face_features.shape[0]
        global_features_initial = global_features_initial.view(-1,2208)
        face_features_sum = torch.sum(face_features, dim=1)
        face_features_sum = face_features_sum.view(-1, 256)
        for i in range(batch_size):
            faces_num_div = float(min(numberFaces[i], maxFaces))
            if faces_num_div != 0:
                face_features_sum[i] = face_features_sum[i] / faces_num_div
        #THE face_features_sum TENSOR NOW CONTAINS AVERAGE OF THE FACE FEATURES

        face_features_sum = self.bn_face_features(face_features_sum)
        global_features_initial = self.bn_global(global_features_initial)

        final_features = torch.cat((face_features_sum, global_features_initial), dim=1)
        final_features = self.dropout_classifier(final_features)

        x = (self.global_fc3_debug(final_features))
        return x
    
model5 = torch.load('./TrainedModels/EnsembleModels/PretrainedDenseNet-FineTune-2208-3-lr001-Regularized-Corrected', map_location=lambda storage, loc: storage).module
print('Avg. Face Features Concat Model Loaded! (Model 5)')

Avg. Face Features Concat Model Loaded! (Model 5)


# Model 6
## Face Attention Model (EmotiC) using 3rd Para Attention (FaceAttention_AlignedModel_FullTrain_3para_lr001_dropout_BN_SoftmaxLr01_EmotiC)

In [14]:
class FaceAttention(nn.Module):
    def __init__(self, global_model, non_align_model):
        super(FaceAttention, self).__init__()
        
        self.global_model = global_model
        self.non_align_model = non_align_model
        
        self.global_fc3_debug = nn.Linear(320, 3)
        nn.init.kaiming_normal_(self.global_fc3_debug.weight)
        self.global_fc3_debug.bias.data.fill_(0.01)

        self.global_fc = nn.Linear(256, 64)
        nn.init.kaiming_normal_(self.global_fc.weight)
        self.global_fc.bias.data.fill_(0.01)   

        self.global_fc_dropout = nn.Dropout(p = 0.5)
        self.global_fc_main_dropout = nn.Dropout(p = 0.5)
        self.non_align_model_dropout = nn.Dropout(p = 0.5)

        self.bn_debug_face = nn.BatchNorm1d(64, affine=False)
        self.bn_debug_global = nn.BatchNorm1d(256, affine=False)
    
    def forward(self, image, face_features_initial, numberFaces, labels):

        features = self.global_model.forward(image)

        global_features_main = self.global_fc_main_dropout(features)
        
        global_features = self.global_fc_dropout(self.global_fc(global_features_main))

        global_features = global_features.view(-1,1,64)

        batch_size = global_features.shape[0]
        
        maxNumber = np.minimum(numberFaces, maxFaces)

        face_features = torch.zeros((batch_size,maxFaces,64), dtype = torch.float)
        
        face_features = face_features.to(device)

        for j in range(batch_size):
            face = face_features_initial[j]
            face_features[j, :, :] = self.non_align_model.forward(face, labels)
        
        face_features = self.non_align_model_dropout(face_features)

        face_features = face_features.view(batch_size, 64, -1)

        mask = np.zeros((batch_size,1,maxFaces), dtype = 'float32')
        for j in range(batch_size):
            for i in range(maxFaces - (int(maxNumber[j]))):
                mask[j][0][int(numberFaces[j]) + i] = float('-inf')
        mask = torch.from_numpy(mask)
        mask = mask.to(device)
        attention_scores = torch.bmm(global_features, face_features) #(batch_size, 1, 256) x (batch_size, 256, nFaces) = (batch_size, 1, nFaces)
        attention_scores = attention_scores+mask
                
        #Convert Scores to Weight
        attention_scores = F.softmax(attention_scores, dim = -1)
        
        attention_weights = attention_scores
#         print(attention_scores.shape)
        # print(attention_scores)
        
        attention_weights = Variable(attention_scores)
        
        for i in range(len(maxNumber)):
            if maxNumber[i] == 0:
                for j in range(maxFaces):
                    attention_weights[i][0][j] =  0 
        
        #Taking Weighted Average of Face Featrues
        face_features = face_features.view(batch_size, -1, 64) #(batch_size, nFaces, 256)
        attention_scores = attention_weights.view(batch_size, 1, -1) #(batch_size, 1, nFaces)
        attended_face_features = torch.bmm(attention_scores, face_features)
        
        #Concatenating Global and Attended Face Features 
        attended_face_features = attended_face_features.view(batch_size, -1)
        # global_features = global_features.view(batch_size, -1)
        attended_face_features = self.bn_debug_face(attended_face_features)
        global_features_main = self.bn_debug_global(global_features_main)
        final_features = torch.cat((attended_face_features, global_features_main), dim=1)
        
        x = (self.global_fc3_debug(final_features))        
        return x

model6 = torch.load('./TrainedModels/EnsembleModels/FaceAttention_AlignedModel_FullTrain_3para_lr001_dropout_BN_SoftmaxLr01_EmotiC', map_location=lambda storage, loc: storage).module
print('Face Attention Model (EmotiC) using 3rd Para Attention Loaded! (Model 6)')

Face Attention Model (EmotiC) using 3rd Para Attention Loaded! (Model 6)


# Model 7
## Face Attention Model (EmotiC) using 4th Para Attention (FaceAttention_AlignedModel_FullTrain_4para_lr001_dropout_BN_SoftmaxLr01_EmotiC)

In [15]:
class FaceAttention(nn.Module):
    def __init__(self, global_model, non_align_model):
        super(FaceAttention, self).__init__()
        
        self.global_model = global_model
        self.non_align_model = non_align_model
        
        self.global_fc3_debug = nn.Linear(512, 3)
        nn.init.kaiming_normal_(self.global_fc3_debug.weight)
        self.global_fc3_debug.bias.data.fill_(0.01)

        self.attentionfc1 = nn.Linear(256, 64)
        nn.init.kaiming_normal_(self.attentionfc1.weight)
        self.attentionfc1.bias.data.fill_(0.01)   

        self.attentionfc2 = nn.Linear(64, 1)
        nn.init.kaiming_normal_(self.attentionfc2.weight)
        self.attentionfc2.bias.data.fill_(0.01)

        self.attentionfc1_dropout = nn.Dropout(p = 0.5)
        self.global_fc_main_dropout = nn.Dropout(p = 0.5)
        self.non_align_model_dropout = nn.Dropout(p = 0.5)

        self.bn_debug_face = nn.BatchNorm1d(256, affine=False)
        self.bn_debug_global = nn.BatchNorm1d(256, affine=False)
    
    def forward(self, image, face_features_initial, numberFaces, labels):

        features = self.global_model.forward(image)

        global_features = self.global_fc_main_dropout(features)
        
        batch_size = global_features.shape[0]

        global_features = global_features.view(-1,1,256)

        
        maxNumber = np.minimum(numberFaces, maxFaces)

        face_features = torch.zeros((batch_size,maxFaces,256), dtype = torch.float)
        
        face_features = face_features.to(device)

        mid_face_features = torch.zeros((batch_size, maxFaces, 1), dtype = torch.float)
        face_features_inter = torch.zeros((batch_size, maxFaces, 64), dtype = torch.float)
        face_features_inter = face_features_inter.to(device)
        mid_face_features = mid_face_features.to(device)

        for j in range(batch_size):
            face = face_features_initial[j]
            face_features[j, :, :] = self.non_align_model_dropout(self.non_align_model.forward(face, labels))
            face_features_inter[j] = self.attentionfc1_dropout(self.attentionfc1(face_features[j]))
            mid_face_features[j] = self.attentionfc2(face_features_inter[j])
        
    
        mid_face_features = mid_face_features.view(batch_size, 1, maxFaces)

        mask = np.zeros((batch_size,1,maxFaces), dtype = 'float32')
        for j in range(batch_size):
            for i in range(maxFaces - (int(maxNumber[j]))):
                mask[j][0][int(numberFaces[j]) + i] = float('-inf')
        mask = torch.from_numpy(mask)
        mask = mask.to(device)
        attention_scores = mid_face_features + mask
        
        #Convert Scores to Weight
        attention_scores = F.softmax(attention_scores, dim = -1)
        
        attention_weights = Variable(attention_scores)
        
        for i in range(len(maxNumber)):
            if maxNumber[i] == 0:
                for j in range(maxFaces):
                    attention_weights[i][0][j] =  0 
        
        #Taking Weighted Average of Face Featrues
        face_features = face_features.view(batch_size, -1, 256) #(batch_size, nFaces, 256)
        attention_scores = attention_weights.view(batch_size, 1, -1) #(batch_size, 1, nFaces)
        attended_face_features = torch.bmm(attention_scores, face_features)
        
        #Concatenating Global and Attended Face Features 
        attended_face_features = attended_face_features.view(batch_size, -1)
        global_features = global_features.view(batch_size, -1)
        
        attended_face_features = self.bn_debug_face(attended_face_features)
        global_features = self.bn_debug_global(global_features)

        final_features = torch.cat((attended_face_features, global_features), dim=1)
        
        x = (self.global_fc3_debug(final_features))        
        return x
    
model7 = torch.load('./TrainedModels/EnsembleModels/FaceAttention_AlignedModel_FullTrain_4para_lr001_dropout_BN_SoftmaxLr01_EmotiC', map_location=lambda storage, loc: storage).module
print('Face Attention Model (EmotiC) using 4rd Para Attention Loaded! (Model 7)')

Face Attention Model (EmotiC) using 4rd Para Attention Loaded! (Model 7)


# Model 8
## Face Attention Model using 4th Para Attention (FaceAttention_AlignedModel_FullTrain_4para_lr01_dropout_BN_SoftmaxLr01)

In [16]:
class FaceAttention(nn.Module):
    def __init__(self, global_model, non_align_model):
        super(FaceAttention, self).__init__()
        
        self.global_model = global_model
        self.non_align_model = non_align_model
        
        self.global_fc3_debug = nn.Linear(512, 3)
        nn.init.kaiming_normal_(self.global_fc3_debug.weight)
        self.global_fc3_debug.bias.data.fill_(0.01)

        self.attentionfc1 = nn.Linear(256, 64)
        nn.init.kaiming_normal_(self.attentionfc1.weight)
        self.attentionfc1.bias.data.fill_(0.01)   

        self.attentionfc2 = nn.Linear(64, 1)
        nn.init.kaiming_normal_(self.attentionfc2.weight)
        self.attentionfc2.bias.data.fill_(0.01)

        self.global_fc_main = nn.Linear(2208, 256)
        nn.init.kaiming_normal_(self.global_fc_main.weight)
        self.global_fc_main.bias.data.fill_(0.01)

        self.attentionfc1_dropout = nn.Dropout(p = 0.5)
        self.global_fc_main_dropout = nn.Dropout(p = 0.5)
        self.non_align_model_dropout = nn.Dropout(p = 0.5)

        self.bn_debug_face = nn.BatchNorm1d(256, affine=False)
        self.bn_debug_global = nn.BatchNorm1d(256, affine=False)
    
    def forward(self, image, face_features_initial, numberFaces, labels):

        features = self.global_model.forward(image)

        out = F.relu(features, inplace = False)
        global_features_initial = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1)

        global_features_initial = Variable(global_features_initial)

        global_features_initial = global_features_initial.view(-1,2208)

        global_features = self.global_fc_main_dropout(self.global_fc_main(global_features_initial))
        
        batch_size = global_features.shape[0]

        global_features = global_features.view(-1,1,256)

        
        maxNumber = np.minimum(numberFaces, maxFaces)

        face_features = torch.zeros((batch_size,maxFaces,256), dtype = torch.float)
        
        face_features = face_features.to(device)

        mid_face_features = torch.zeros((batch_size, maxFaces, 1), dtype = torch.float)
        face_features_inter = torch.zeros((batch_size, maxFaces, 64), dtype = torch.float)
        face_features_inter = face_features_inter.to(device)
        mid_face_features = mid_face_features.to(device)

        for j in range(batch_size):
            face = face_features_initial[j]
            face_features[j, :, :] = self.non_align_model_dropout(self.non_align_model.forward(face, labels))
            face_features_inter[j] = self.attentionfc1_dropout(self.attentionfc1(face_features[j]))
            mid_face_features[j] = self.attentionfc2(face_features_inter[j])
            
        mid_face_features = mid_face_features.view(batch_size, 1, maxFaces)

        mask = np.zeros((batch_size,1,maxFaces), dtype = 'float32')
        for j in range(batch_size):
            for i in range(maxFaces - (int(maxNumber[j]))):
                mask[j][0][int(numberFaces[j]) + i] = float('-inf')
        mask = torch.from_numpy(mask)
        mask = mask.to(device)
        attention_scores = mid_face_features + mask
        
        #Convert Scores to Weight
        attention_scores = F.softmax(attention_scores, dim = -1)
        
        attention_weights = Variable(attention_scores)
        
        for i in range(len(maxNumber)):
            if maxNumber[i] == 0:
                for j in range(maxFaces):
                    attention_weights[i][0][j] =  0 
        
        #Taking Weighted Average of Face Featrues
        face_features = face_features.view(batch_size, -1, 256) #(batch_size, nFaces, 256)
        attention_scores = attention_weights.view(batch_size, 1, -1) #(batch_size, 1, nFaces)
        attended_face_features = torch.bmm(attention_scores, face_features)
        
        #Concatenating Global and Attended Face Features 
        attended_face_features = attended_face_features.view(batch_size, -1)
        global_features = global_features.view(batch_size, -1)
        
        attended_face_features = self.bn_debug_face(attended_face_features)
        global_features = self.bn_debug_global(global_features)

        final_features = torch.cat((attended_face_features, global_features), dim=1)
        
        x = (self.global_fc3_debug(final_features))        
        return x
    
model8 = torch.load('./TrainedModels/EnsembleModels/FaceAttention_AlignedModel_FullTrain_4para_lr01_dropout_BN_SoftmaxLr01', map_location=lambda storage, loc: storage).module
print('Face Attention Model using 4rd Para Attention Loaded! (Model 8)')

Face Attention Model using 4rd Para Attention Loaded! (Model 8)


# Ensemble

In [17]:
class Ensemble(nn.Module):
    def __init__(self, model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8):
        super(Ensemble, self).__init__()
        
        self.model_1 = model_1
        self.model_2 = model_2
        self.model_3 = model_3
        self.model_4 = model_4
        self.model_5 = model_5
        self.model_6 = model_6
        self.model_7 = model_7
        self.model_8 = model_8

    def forward(self, image, labels, face_features_mtcnn, face_features_aligned, numberFaces):
        
        output1 = self.model_1(image)
        output2 = self.model_2(image)
        output3 = self.model_3(face_features_aligned, numberFaces, labels)
        output4 = self.model_4(face_features_aligned, numberFaces, labels)
        output5 = self.model_5(image, face_features_mtcnn, numberFaces)
        output6 = self.model_6(image, face_features_aligned, numberFaces, labels)
        output7 = self.model_7(image, face_features_aligned, numberFaces, labels)
        output8 = self.model_8(image, face_features_aligned, numberFaces, labels)
        
        output = 0 * output1 + 1.5 * output2 + 1 * output3 + 0.5 * output4 + 8 * output5 +  1.5 * output6 + 1 * output7 + 1 * output8
        return output

In [18]:
model_ft = Ensemble(model1, model2, model3, model4, model5, model6, model7, model8)
model_ft = model_ft.to(device)
model_ft = nn.DataParallel(model_ft)
print("Ensemble Loaded.")

Ensemble Loaded.


# Training 

In [22]:
def train_model(model, criterion, optimizer=None, scheduler=None, num_epochs = 1):
    
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs - 1))
        print('-' * 10)
        
        for phase in range(1, 2):            
            if phase == 0:
                dataloaders = train_dataloader
                # scheduler.step()
                model.train()
            else:
                dataloaders = val_dataloader
                model.eval()
            
            running_loss = 0.0
            running_corrects = 0
            
            for i_batch, sample_batched in enumerate(dataloaders):
                inputs = sample_batched['image']
                labels = sample_batched['label']
                face_features_mtcnn = sample_batched['features_mtcnn']
                face_features_aligned = sample_batched['features_aligned']
                numberFaces = sample_batched['numberFaces']
                inputs = inputs.to(device)
                labels = labels.to(device)
                face_features_mtcnn= face_features_mtcnn.to(device)
                face_features_aligned = face_features_aligned.to(device)
                numberFaces = numberFaces.to(device)
                
                # optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 0):
#                     print("forward")
                    outputs = model(inputs, labels, face_features_mtcnn, face_features_aligned, numberFaces)
#                     print("forward " + str(i_batch))
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    #print("loss done")
                    
                    if phase == 0:
                        loss.backward()
                        # optimizer.step()
                        #print("backward done")
                
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            
            if phase == 1 and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        
        print()
    time_elapsed = time.time() - since
    print('Training complete in {: .0f}m {:0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:.4f}'.format(best_acc))
    return model

In [23]:
criterion = nn.CrossEntropyLoss()

# optimizer_ft = optim.SGD(model_ft.parameters(), lr = 0.01, momentum=0.9)

# exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [24]:
model = train_model(model_ft, criterion, None, None, num_epochs=epochs)

Epoch 0/0
----------
forward
torch.Size([3, 3])
forward


KeyboardInterrupt: 

In [None]:
# torch.save(model_ft.state_dict(), "FaceAttention_FullTrain_64_1layer_sgd_lr01.pt")