In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import sys
import glob
import random
import functools
from tqdm import tqdm
from enum import Enum
from subprocess import call
from copy import copy
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.utils import to_categorical
import threading


PATH_TO_PROJECT = os.getcwd()
PATH_TO_DATA = "/home/tomasz/Dokumenty/shared/"
DATASET_FOLDER = "data"
EXTRACTED_DATA_FOLDER = "extracted_data"
DATASET_RESULT_FILE = "ACCEDEranking.txt"
VIDO_EXTENSION = ".mp4"
PATH_TO_DATASET = os.path.join(PATH_TO_DATA, DATASET_FOLDER)
VIDEO_CLASSES = ["Neutral", "LALV", "LAHV", "HALV", "HAHV"]
MIN_NEUTRAL_LEVEL_VALUE = 3600
MAX_NEUTRAL_LEVEL_VALUE = 6300
SPLIT_LEVEL = 5000

FRAME_SIZE = 100
MAX_FRAMES = 300

class ValueLevel(Enum):
    Low = 1
    LowN = 2
    HighN = 3
    High = 4
    
class VideoClass(Enum):
    Neutral = 0
    LALV = 1
    LAHV = 2
    HALV = 3
    HAHV = 4
      

class threadsafe_iterator:
    def __init__(self, iterator):
        self.iterator = iterator
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return next(self.iterator)

def threadsafe_generator(func):
    """Decorator"""
    @functools.wraps(func)
    def gen(*a, **kw):
        return threadsafe_iterator(func(*a, **kw))
    return gen
    

class DataSet():
    def __init__(self, seq_length=40, image_shape=(224, 224, 3)):
        
        """Constructor.
        seq_length = (int) the number of frames to consider
        """
        self.path_to_dataset = os.path.join(PATH_TO_DATA, DATASET_FOLDER)
        self.path_to_extracted_data = os.path.join(PATH_TO_DATA, EXTRACTED_DATA_FOLDER)
        self.seq_length = seq_length
        self.max_frames = MAX_FRAMES  # max number of frames a video can have for us to use it
        # Get the data.
        self.data = self.get_data()

        # Now do some minor data cleaning.
        self.data = self.clean_data()

        self.image_shape = image_shape
        
    def get_data(self):
        data = []
        path_to_result = os.path.join(PATH_TO_DATA, DATASET_RESULT_FILE)
        list_of_results = open(path_to_result).readlines()
        list_of_results.pop(0)
        for res in list_of_results:
            splitted_res = res.split("\t")
            video_name = splitted_res[1]
            video_name = video_name[:-4]
            valency = splitted_res[2]
            arousal = splitted_res[3]
            emotion_class = DataSet.get_class_for_arousal_and_valency(int(arousal), int(valency))
            number_of_frames = self.extract_data_for_video(video_name)
            #print(video_name + ": " + arousal + ", " + valency + ", " + emotion_class.name + ", " + str(number_of_frames))
            data.append([video_name, int(valency), int(arousal), emotion_class, number_of_frames])

        return data
        
    @staticmethod
    def get_class_for_arousal_and_valency(arousal, valency):
        if arousal < MIN_NEUTRAL_LEVEL_VALUE:
            if valency < SPLIT_LEVEL:
                return VideoClass.LALV
            else:
                return VideoClass.LAHV
        elif arousal < SPLIT_LEVEL:
            if valency < MIN_NEUTRAL_LEVEL_VALUE:
                return VideoClass.LALV
            elif valency < MAX_NEUTRAL_LEVEL_VALUE:
                return VideoClass.Neutral
            else:
                return VideoClass.LAHV
        elif arousal < MAX_NEUTRAL_LEVEL_VALUE:
            if valency < MIN_NEUTRAL_LEVEL_VALUE:
                return VideoClass.HALV
            elif valency < MAX_NEUTRAL_LEVEL_VALUE:
                return VideoClass.Neutral
            else:
                return VideoClass.HAHV
        else:
            if valency < SPLIT_LEVEL:
                return VideoClass.HALV
            else:
                return VideoClass.HAHV
            
    def extract_data_for_video(self, video_name):
        if not os.path.isdir(self.path_to_extracted_data):
            os.mkdir(self.path_to_extracted_data)
        if not os.path.isdir(os.path.join(self.path_to_extracted_data, video_name)):
            os.mkdir(os.path.join(self.path_to_extracted_data, video_name))
            
        if not self.check_if_video_is_extracted(video_name):
            src = os.path.join(self.path_to_dataset, video_name + VIDO_EXTENSION)
            dest = os.path.join(self.path_to_extracted_data, video_name,
                        '%04d.jpg')
            call(["ffmpeg", "-i", src, dest])
        return len(self.get_frames_for_video(video_name))
            
    def get_frames_for_video(self, video_name):
        images = sorted(glob.glob(os.path.join(self.path_to_extracted_data, video_name, '*jpg')))
        return images
        
    def check_if_video_is_extracted(self, video_name):
        return bool(os.path.exists(os.path.join(self.path_to_extracted_data, video_name,
                               '0001.jpg')))
        
    def clean_data(self):
        """Limit samples to greater than the sequence length and fewer
        than N frames. Also limit it to classes we want to use."""
        data_clean = []
        for item in self.data:
            if int(item[4]) >= self.seq_length and int(item[4]) <= self.max_frames:
                data_clean.append(item)

        return data_clean
    
    def split_train_test(self, percent_of_train):
        number_of_train = int(len(self.data) * percent_of_train)
        y = copy(self.data)
        random.shuffle(y)
        train = y[:number_of_train]
        test = y[number_of_train:]
        
        return train, test
    
    def build_image_sequence(self, frames):
        """Given a set of frames (filenames), build our sequence."""
        return [self.process_image(x, self.image_shape) for x in frames]
    
    def process_image(self, image, target_shape):
        """Given an image, process it and return the array."""
        # Load the image.
        h, w, _ = target_shape
        image = load_img(image, target_size=(h, w))

        # Turn it into numpy, normalize and return.
        img_arr = img_to_array(image)
        x = (img_arr / 255.).astype(np.float32)

        return x
    
    def get_all_sequences_in_memory(self, train_or_test, precent_of_train):
        train, test = self.split_train_test(precent_of_train)
        data = train if train_or_test == "train" else test
        
        X, y = [], []
        
        for row in data:
            frames = self.get_frames_for_video(row[0])
            frames = DataSet.rescale_list(frames, self.seq_length)
            sequence = self.build_image_sequence(frames)
            X.append(sequence)
            y.append(self.get_class_one_hot(row[3]))
            
        return np.array(X), np.array(y)
        
    @threadsafe_generator
    def frame_generator(self, batch_size, train_test, precent_of_train):
        train, test = self.split_train_test(precent_of_train)
        data = train if train_or_test == "train" else test
        
        while 1:
            X, y = [], []
            
            for _ in range(batch_size):
                sample = random.choice(data)
                frames = self.get_frames_for_video(sample[0])
                frames = DataSet.rescale_list(frames, self.seq_length)
                sequence = self.build_image_sequence(frames)
                X.append(sequence)
                y.append(self.get_class_one_hot(sample[3]))
            
            
            yield np.array(X), np.array(y)
            
    def get_class_one_hot(self, video_class):
        # Now one-hot it.
        label_hot = to_categorical(video_class.value, len(VideoClass))
        return label_hot
            
    @staticmethod        
    def rescale_list(input_list, size):
        """Given a list and a size, return a rescaled/samples list. For example,
        if we want a list of size 5 and we have a list of size 25, return a new
        list of size five which is every 5th element of the origina list."""
        assert len(input_list) >= size

        # Get the number to skip between iterations.
        skip = len(input_list) // size

        # Build our new output.
        output = [input_list[i] for i in range(0, len(input_list), skip)]

        # Cut off the last one if needed.
        return output[:size]

In [2]:
from tensorflow.keras.layers import Dense, Flatten, Dropout, ZeroPadding3D, Input, Activation
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import Conv2D, MaxPooling3D, Conv3D, MaxPooling2D
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.regularizers import l2
from tensorflow import metrics
import tensorflow as tf
from collections import deque
import sys

class AVAnalysisModel():   
    def __init__(self, seq_length, saved_model=None):
        self.seq_length = seq_length
        self.input_shape = (seq_length, 80, 80, 3)
        
        if saved_model is not None:
            self.model = load_model(self.saved_model)
        else:
            self.model = self.lrcn()
            
        # Now compile the network.
        optimizer = Adam(lr=1e-5, decay=1e-6)
        self.model.compile(loss='categorical_crossentropy', optimizer='adam',
                           metrics=['accuracy'])

    
    def lrcn(self):
        """Build a CNN into RNN.
        Starting version from:
            https://github.com/udacity/self-driving-car/blob/master/
                steering-models/community-models/chauffeur/models.py
        Heavily influenced by VGG-16:
            https://arxiv.org/abs/1409.1556
        Also known as an LRCN:
            https://arxiv.org/pdf/1411.4389.pdf
        """
        def add_default_block(model, kernel_filters, init, reg_lambda):

            # conv
            model.add(TimeDistributed(Conv2D(kernel_filters, (3, 3), padding='same',
                                             kernel_initializer=init, kernel_regularizer=l2(l=reg_lambda))))
            model.add(TimeDistributed(tf.compat.v1.layers.BatchNormalization()))
            model.add(TimeDistributed(Activation('relu')))
            # conv
            model.add(TimeDistributed(Conv2D(kernel_filters, (3, 3), padding='same',
                                             kernel_initializer=init, kernel_regularizer=l2(l=reg_lambda))))
            model.add(TimeDistributed(tf.compat.v1.layers.BatchNormalization()))
            model.add(TimeDistributed(Activation('relu')))
            # max pool
            model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

            return model

        initialiser = 'glorot_uniform'
        reg_lambda  = 0.001

        model = Sequential()

        # first (non-default) block
        model.add(TimeDistributed(Conv2D(32, (7, 7), strides=(2, 2), padding='same',
                                         kernel_initializer=initialiser, kernel_regularizer=l2(l=reg_lambda)),
                                  input_shape=self.input_shape))
        model.add(TimeDistributed(tf.compat.v1.layers.BatchNormalization()))
        model.add(TimeDistributed(Activation('relu')))
        model.add(TimeDistributed(Conv2D(32, (3,3), kernel_initializer=initialiser, kernel_regularizer=l2(l=reg_lambda))))
        model.add(TimeDistributed(tf.compat.v1.layers.BatchNormalization()))
        model.add(TimeDistributed(Activation('relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        # 2nd-5th (default) blocks
        model = add_default_block(model, 64,  init=initialiser, reg_lambda=reg_lambda)
        model = add_default_block(model, 128, init=initialiser, reg_lambda=reg_lambda)
        model = add_default_block(model, 256, init=initialiser, reg_lambda=reg_lambda)
        model = add_default_block(model, 512, init=initialiser, reg_lambda=reg_lambda)

        # LSTM output head
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(256, return_sequences=False, dropout=0.5))
        model.add(Dense(len(VideoClass), activation='softmax'))

        return model

In [6]:
import time
import os.path
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping

PERCENT_OF_TRAIN = 0.8

def train(seq_length, saved_model=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100, loaded_data=None):
    
    # Helper: TensorBoard
    tb = TensorBoard(log_dir=os.path.join('data', 'logs'))

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=5)

    # Get the data and process it.
    if loaded_data is not None:
        data = loaded_data
    elif image_shape is None:
        data = DataSet(
            seq_length=seq_length
        )
    else:
        data = DataSet(
            seq_length=seq_length,
            image_shape=image_shape
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory('train', PERCENT_OF_TRAIN)
        X_test, y_test = data.get_all_sequences_in_memory('test', PERCENT_OF_TRAIN)
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, 'train', PERCENT_OF_TRAIN)
        val_generator = data.frame_generator(batch_size, 'test', PERCENT_OF_TRAIN)

    # Get the model.
    rm = AVAnalysisModel(seq_length, saved_model)
    print(X.shape)
    

    # Fit!
    if load_to_memory:
        # Use standard fit.
        rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            #verbose=1,
            #callbacks=[tb, early_stopper],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[tb, early_stopper],
            validation_data=val_generator,
            validation_steps=40,
            workers=4)
 
    now = time.strftime("%c")
    rm.save(str(now) + '.model')

In [7]:
saved_model = None  # None or weights file
seq_length = 40
load_to_memory = True  # pre-load the sequences into memory
batch_size = 16
nb_epoch = 10
image_shape = (80, 80, 3)
data = DataSet(
            seq_length=seq_length,
            image_shape=image_shape
        )

In [8]:


train(seq_length, saved_model=saved_model, image_shape=image_shape,
          load_to_memory=load_to_memory, batch_size=batch_size, nb_epoch=nb_epoch, loaded_data=data)

(31, 40, 80, 80, 3)
Train on 31 samples, validate on 8 samples
Epoch 1/10

_SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'time_distributed_37/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_37/batch_normalization/cond/Identity_1:0' shape=(32,) dtype=float32>, <tf.Tensor 'time_distributed_37/batch_normalization/cond/Identity_2:0' shape=(32,) dtype=float32>, <tf.Tensor 'time_distributed_40/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_40/batch_normalization/cond/Identity_1:0' shape=(32,) dtype=float32>, <tf.Tensor 'time_distributed_40/batch_normalization/cond/Identity_2:0' shape=(32,) dtype=float32>, <tf.Tensor 'time_distributed_44/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_44/batch_normalization/cond/Identity_1:0' shape=(64,) dtype=float32>, <tf.Tensor 'time_distributed_44/batch_normalization/cond/Identity_2:0' shape=(64,) dtype=float32>, <tf.Tensor 'time_distributed_47/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_47/batch_normalization/cond/Identity_1:0' shape=(64,) dtype=float32>, <tf.Tensor 'time_distributed_47/batch_normalization/cond/Identity_2:0' shape=(64,) dtype=float32>, <tf.Tensor 'time_distributed_51/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_51/batch_normalization/cond/Identity_1:0' shape=(128,) dtype=float32>, <tf.Tensor 'time_distributed_51/batch_normalization/cond/Identity_2:0' shape=(128,) dtype=float32>, <tf.Tensor 'time_distributed_54/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_54/batch_normalization/cond/Identity_1:0' shape=(128,) dtype=float32>, <tf.Tensor 'time_distributed_54/batch_normalization/cond/Identity_2:0' shape=(128,) dtype=float32>, <tf.Tensor 'time_distributed_58/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_58/batch_normalization/cond/Identity_1:0' shape=(256,) dtype=float32>, <tf.Tensor 'time_distributed_58/batch_normalization/cond/Identity_2:0' shape=(256,) dtype=float32>, <tf.Tensor 'time_distributed_61/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_61/batch_normalization/cond/Identity_1:0' shape=(256,) dtype=float32>, <tf.Tensor 'time_distributed_61/batch_normalization/cond/Identity_2:0' shape=(256,) dtype=float32>, <tf.Tensor 'time_distributed_65/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_65/batch_normalization/cond/Identity_1:0' shape=(512,) dtype=float32>, <tf.Tensor 'time_distributed_65/batch_normalization/cond/Identity_2:0' shape=(512,) dtype=float32>, <tf.Tensor 'time_distributed_68/batch_normalization/cond_1/Identity:0' shape=() dtype=float32>, <tf.Tensor 'time_distributed_68/batch_normalization/cond/Identity_1:0' shape=(512,) dtype=float32>, <tf.Tensor 'time_distributed_68/batch_normalization/cond/Identity_2:0' shape=(512,) dtype=float32>]