<a href="https://colab.research.google.com/github/werlang/emolearn-ml-model/blob/main/vgg_fine_tune_v6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from keras.engine import  Model
from keras.layers import Flatten, Dense, Input
from keras.models import Sequential, load_model
from keras.layers import TimeDistributed, GRU, LSTM, Dropout, Conv2D, BatchNormalization, MaxPooling2D
from keras.utils import Sequence
from keras.utils import to_categorical
from keras.optimizers import Adam, SGD
from keras.callbacks import TensorBoard, LearningRateScheduler, ReduceLROnPlateau, EarlyStopping, Callback, ModelCheckpoint
from keras.metrics import AUC
import pandas as pd
import numpy as np
import os, cv2
import datetime
import tensorflow as tf
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from keras import backend as K
import math

def start_colab():
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    !pip install keras_vggface


def extract_data():
    !mkdir features

    # aligned faces extracted from openface
    print("COPYING TRAIN SET...")
    !unzip -n -q "drive/My Drive/Doutorado/Implementação/new_features_daisee/Train.zip" -d features 
    print("COPYING TEST SET...")
    !unzip -n -q "drive/My Drive/Doutorado/Implementação/new_features_daisee/Test.zip" -d features 
    print("COPYING VALIDATION SET...")
    !unzip -n -q "drive/My Drive/Doutorado/Implementação/new_features_daisee/Validation.zip" -d features 
    print("COPYING LABELS...")
    !cp -r "drive/My Drive/Doutorado/Implementação/new_features_daisee/labels" ./
    print("DONE")


def set_globals():
    labels_path = "labels"
    features_path = "features"
    drive_save_path = 'drive/My Drive/1NOSYNC/DT/checkpoint'
    batch_size = 10
    labels = ['Test', 'Train', 'Validation']
    time_frames = 10

    return labels_path, features_path, drive_save_path, batch_size, labels, time_frames


class Generator_V(Sequence):
    def __init__(self, split, batch_size, frames, **kw):
        self.batch_size = batch_size 
        self.split = split

        Y = np.load("{}/{}.npy".format(labels_path, split))
        Y_gen = to_categorical(Y, 2)

        self.labels, self.videos = [], []
        interval = 2
        stride = 0.5
        fps = 15
        skip = int(round(interval * fps / frames, 0))

        for Yi in range(len(Y_gen)):
            dir_path = "{}/{}/{}".format(features_path, split, Yi)
            files = []
            for r, d, f in os.walk(dir_path):
                for i in f:
                    files.append("{}/{}".format(r, i))

            for i in range(0, (len(files) - frames*skip) // skip + 1, np.max([1, int(stride * frames)])):
                temp = []
                for j in range(i*skip, (i+frames)*skip, skip):
                    temp.append(files[j])
                self.videos.append(temp)
                self.labels.append(Y_gen[Yi])

    def __len__(self):
        return int(np.ceil(len(self.videos) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.videos[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx + 1) * self.batch_size]

        videos = []
        for video in batch_x:
            images = []
            for name in video:
                images.append(cv2.imread(name))
            videos.append(np.array(images)/255)

        videos = np.array(videos)
        label = np.array(batch_y)

        return videos, label


def build_model():
    from keras_vggface.vggface import VGGFace
    
    ########### HAVE TO SET IF THE MODEL IS TO BE LOADED FROM A CHECKPOINT OR TRAINED FROM SCRATCH ###########
    loadModel = False

    ########### IF BUILT, MUST DEFINE A NAME TO APPEND TO DIRECTORY NAME ###############
    ident_name = 'fine-tune-images-vgg-striding-generator'
    epoch = 0

    ########### IF LOADED, MUST DEFINE DIR NAME AND STARTING EPOCH ############
    if loadModel == True:
        dir_name = '2020-6-21-2-50-38-fine-tune-images-vgg-resampled'
        epoch = 2


    #defining the model
    vgg = VGGFace(
        include_top=False,
        input_shape=(224, 224, 3),
        # model='resnet50',
        weights = 'vggface')

    for layer in vgg.layers[:-4]:
        layer.trainable = False

    # for training with vgg architecture
    last_layer = vgg.get_layer('pool5').output
    # for training resnet
    # last_layer = vgg.get_layer('avg_pool').output
    x = Flatten(name='flatten')(last_layer)

    vgg_model = Model(vgg.input, x)

    # print("Model 2d:")
    # for layer in vgg_model.layers:
    #     print("{} {}".format(layer.output.shape.as_list(), layer.trainable))

    # print(vgg_model.summary())

    model = Sequential()
    model.add(TimeDistributed(vgg_model, input_shape=(time_frames, 224, 224, 3)))
    model.add(GRU(4,return_sequences=False))
    model.add(Dropout(0.2))
    # model.add(LSTM(32, return_sequences=False))
    # model.add(Dropout(0.2))

    # model.add(Dense(16, activation='relu'))
    # model.add(Dense(8, activation='relu'))

    model.add(Dense(4, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(2, activation='softmax', name='classifier'))

    model.compile(
        loss='categorical_crossentropy',
        # optimizer = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True), 
        optimizer = Adam(lr=1e-4),
        metrics=['accuracy'])

    #showing the model

    # print("GRU:")
    # for layer in model.layers:
    #     print("{} {}".format(layer.output.shape.as_list(), layer.trainable))

    # model.summary()

    if loadModel == False:
        t = datetime.datetime.now()
        prefix = str(t.year) +'-'+ str(t.month) +'-'+ str(t.day) +'-'+ str(t.hour) +'-'+ str(t.minute) +'-'+ str(t.second)
        save_dir = "{}/{}-{}".format(drive_save_path, prefix, ident_name)
        os.mkdir(save_dir)
    else:
        file_name = "{:03d}.h5".format(epoch)
        save_dir = "{}/{}".format(drive_save_path, dir_name)
        print("Loading model from {}/{}.".format(save_dir, file_name))
        model = load_model("{}/{}".format(save_dir, file_name))

    return model, save_dir, epoch


def set_callbacks():
    #callbacks
    checkpoint = ModelCheckpoint(
        filepath = save_dir + '/{epoch:03d}.h5', 
        monitor = 'val_accuracy', 
        verbose=1, 
        save_best_only=True,
        mode='max')

    tensorboard = TensorBoard(
    	log_dir         = "{}/logs".format(save_dir),
    	histogram_freq  = 0,
    	write_graph     = True,
    	write_grads     = False,
    	write_images    = True)

    early_stop = EarlyStopping(
        monitor 	= 'val_loss',
        patience 	= 10,
        mode 		= 'min',
        restore_best_weights = True,
        verbose     = 1,
        min_delta   = 0.001)

    reduce_lr_plateau = ReduceLROnPlateau(
        monitor 	= 'val_loss',
        factor		= 0.1,
        patience	= 5,
        mode 		= 'min',
        min_lr		= 1e-8,
        verbose     = 1)

    return [checkpoint, early_stop, reduce_lr_plateau]


def build_generators():
    #build the data generators
    print("Building generators...")
    gen_train = Generator_V('Train', batch_size, time_frames)
    gen_val = Generator_V('Validation', batch_size, time_frames)
    print("DONE")

    return gen_train, gen_val


def fit_model():
    #calculate weights based on train set distribution
    label_array = np.load("{}/Train.npy".format(labels_path))
    class_ratio = np.sum(label_array) / len(label_array)
    weights = {0: class_ratio, 1: 1 - class_ratio}

    def run():
        #run the model
        model.fit_generator(
            gen_train,
            epochs=1000,
            validation_data=gen_val,
            class_weight=weights,
            callbacks=callbacks,
            initial_epoch = epoch)
        
    run()        

In [None]:
# start_colab()
# extract_data()
labels_path, features_path, drive_save_path, batch_size, labels, time_frames = set_globals()
model, save_dir, epoch = build_model()
# callbacks = set_callbacks()
# gen_train, gen_val = build_generators()
# fit_model()