In [152]:
import pandas as pd
import numpy as np
from keras.utils import np_utils, load_img
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
# from keras_vggface.vggface import VGGFace
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D

In [153]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


In [154]:
# from google.colab import drive
# drive.mount('/content/drive')

In [155]:
# %cd /content/drive/MyDrive/Colab Notebooks/NN_PROJ

In [156]:


main_folder = 'input/celeba-dataset/'
images_folder = main_folder + 'img_align_celeba/'
weights_folder = 'weights/'
EXAMPLE_PIC = images_folder + '000506.jpg'

TRAINING_SAMPLES = 1000
VALIDATION_SAMPLES = 2000
TEST_SAMPLES = 2000
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 16
NUM_EPOCHS = 20



In [157]:

IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
K_FOLD = 5

In [158]:
# !ls

In [159]:


# import the data set that include the attribute for each picture
df_attr = pd.read_csv(main_folder + 'list_attr_celeba.csv')
df_attr.columns



Index(['file_name', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive',
       'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose',
       'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows',
       'Chubby', 'Double_Chin', 'Eyeglasses', 'Goatee', 'Gray_Hair',
       'Heavy_Makeup', 'High_Cheekbones', 'Male', 'Mouth_Slightly_Open',
       'Mustache', 'Narrow_Eyes', 'No_Beard', 'Oval_Face', 'Pale_Skin',
       'Pointy_Nose', 'Receding_Hairline', 'Rosy_Cheeks', 'Sideburns',
       'Smiling', 'Straight_Hair', 'Wavy_Hair', 'Wearing_Earrings',
       'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace',
       'Wearing_Necktie', 'Young'],
      dtype='object')

## Data Preprocessing

In [160]:


df_attr.set_index('file_name', inplace=True)
df_attr.replace(to_replace=-1, value=0, inplace=True) #replace -1 by 0
df_attr.shape



(202599, 40)

In [161]:


df_partition = pd.read_csv(main_folder + 'list_eval_partition.csv')
df_partition.head()
df_partition['val'].value_counts().sort_index()

# join the partition with the attributes
df_partition.set_index('file_name', inplace=True)
df_par_attr = df_partition.join(df_attr['Male'], how='inner')
df_par_attr = df_par_attr.join(df_attr['Young'], how='inner')
df_par_attr = df_par_attr.join(df_attr['Attractive'], how='inner')
df_par_attr.head()



Unnamed: 0_level_0,val,Male,Young,Attractive
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000001.jpg,0,0,1,1
000002.jpg,0,0,1,0
000003.jpg,0,1,1,0
000004.jpg,0,0,1,1
000005.jpg,0,0,1,1


In [162]:
df_train = df_par_attr[
    (df_par_attr['val'] == 0) & 
    (df_par_attr['Male'] == 0) & 
    (df_par_attr['Young'] == 0) 
].sample(TRAINING_SAMPLES//2)

df_train = pd.concat([
    df_train,
    df_par_attr[(df_par_attr['val'] == 0) & (df_par_attr['Male'] == 1)].sample(TRAINING_SAMPLES//2)
])

df_train.reset_index(inplace=True)
df_train

Unnamed: 0,file_name,val,Male,Young,Attractive
0,151836.jpg,0,0,0,0
1,088017.jpg,0,0,0,1
2,075970.jpg,0,0,0,0
3,108615.jpg,0,0,0,0
4,066579.jpg,0,0,0,0
...,...,...,...,...,...
995,135541.jpg,0,1,1,0
996,036507.jpg,0,1,1,1
997,074817.jpg,0,1,0,0
998,127638.jpg,0,1,0,0


In [163]:
df_train['gender'] = abs(df_train['Male']-1)

In [164]:
def consolidate_age(df_train):
    
    df_train.loc[(df_train.age == '(38, 42)'),'age']='(38, 43)'
    df_train.loc[(df_train.age == '(8, 12)'),'age']='(8, 23)'
    df_train.loc[(df_train.age == '(15, 20)'),'age']='(8, 23)'
    df_train.loc[(df_train.age == '(27, 32)'),'age']='(25, 32)'

    df_train.loc[(df_train.age == '2'),'age']='(0, 6)'
    df_train.loc[(df_train.age == '3'),'age']='(0, 6)'
    df_train.loc[(df_train.age == '(0, 2)'),'age']='(0, 6)'
    df_train.loc[(df_train.age == '(4, 6)'),'age']='(0, 6)'

    df_train.loc[(df_train.age == '35'),'age']='(34, 36)'
    df_train.loc[(df_train.age == '55'),'age']='(55, 58)'
    df_train.loc[(df_train.age == '58'),'age']='(55, 58)'
    df_train.loc[(df_train.age == '22'),'age']='(8, 23)'
    df_train.loc[(df_train.age == '13'),'age']='(8, 23)'
    df_train.loc[(df_train.age == '45'),'age']='(38, 48)'
    df_train.loc[(df_train.age == '36'),'age']='(34, 36)'
    df_train.loc[(df_train.age == '23'),'age']='(8, 23)'
    df_train.loc[(df_train.age == '57'),'age']='(55, 58)'
    df_train.loc[(df_train.age == '29'),'age']='(25, 32)'
    df_train.loc[(df_train.age == '34'),'age']='(34, 36)'
    df_train.loc[(df_train.age == '42'),'age']='(38, 48)'
    df_train.loc[(df_train.age == '46'),'age']='(38, 48)'

    df_train.loc[(df_train.age == '(38, 43)'),'age']='(38, 48)'
    
    df_train.loc[(df_train.age == '(60, 100)'),'age']=61
    df_train.loc[(df_train.age == '(48, 53)'),'age']=49
    df_train.loc[(df_train.age == '(0, 6)'),'age']=1
    df_train.loc[(df_train.age == '(38, 48)'),'age']=39
    df_train.loc[(df_train.age == '(8, 23)'),'age']=9
    df_train.loc[(df_train.age == '(34, 36)'),'age']=35
    df_train.loc[(df_train.age == '(55, 58)'),'age']=56
    df_train.loc[(df_train.age == '(25, 32)'),'age']=26
    
    df_train.loc[(df_train.gender == 'm'),'gender']="male"
    df_train.loc[(df_train.gender == 'f'),'gender']="female"
    

#     df_train['age'] = np.asarray(df_train['age']).astype(np.float32)
#     df_train['age'] = df_train['age'].astype(np.array)

#     df_train.loc[(df_train.age == df_train.age),'age']=df_train['age'].astype("category")
    
    return df_train

In [165]:
import tensorflow as tf
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("age")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    return ds


#default divisor value == 1
def embed_categorical_feature(feature, name, dataset, is_string, divisor = 1):
    lookup_class = keras.layers.StringLookup if is_string else IntegerLookup
    # Create a lookup layer which will turn strings into integer indices
    lookup = lookup_class(output_mode="int") 

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    lookup.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = lookup(feature)

    num_categories = lookup.vocabulary_size()
    embedded_feature = tf.keras.layers.Embedding(input_dim = num_categories+1, output_dim = math.floor(num_categories/divisor) )

    embedded_feature = embedded_feature(encoded_feature)
    embedded_feature = layers.Flatten()(embedded_feature)

    return embedded_feature

In [166]:
# age = keras.Input(shape=(1,), name="age", dtype="int64")

# train_ds = dataframe_to_dataset(df_train)

# age_embed = embed_categorical_feature(age, "age", train_ds, True)

In [167]:
# train_ds

In [168]:
import keras

In [169]:
# classes = df_train['age'].unique()

# target = df_train['age'].values
# target_classes = keras.utils.to_categorical(target, classes)

In [170]:
import numpy as np 
import pandas as pd
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# dataset_folder_name = 'UTKFace'

TRAIN_TEST_SPLIT = 0.7
IM_WIDTH = IM_HEIGHT = 198
dataset_dict = {
    'gender': {
        0: 'male',
        1: 'female'
    }
}
dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender'].items())

In [171]:
!pip install plotly




[notice] A new release of pip available: 22.3 -> 22.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [172]:
import plotly.graph_objects as go

def plot_distribution(pd_series):
    labels = pd_series.value_counts().index.tolist()
    counts = pd_series.value_counts().values.tolist()
    
    pie_plot = go.Pie(labels=labels, values=counts, hole=.3)
    fig = go.Figure(data=[pie_plot])
    fig.update_layout(title_text='Distribution for %s' % pd_series.name)
    
    fig.show()

In [173]:
df_train.loc[(df_train.gender == 0),'gender']="male"
df_train.loc[(df_train.gender == 1),'gender']="female"

In [174]:
df = df_train

In [175]:
plot_distribution(df_train['gender'])

In [176]:
# bins = [8, 25, 34, 38, 48, 55, 60, np.inf]
# names = ['<6', '8-23', '25-32', '34-36', '38-48', '55-58', '60+']

# age_binned = pd.cut(df['age'], bins, labels=names)
# plot_distribution(age_binned)

In [177]:
df_train.head()

Unnamed: 0,file_name,val,Male,Young,Attractive,gender
0,151836.jpg,0,0,0,0,female
1,088017.jpg,0,0,0,1,female
2,075970.jpg,0,0,0,0,female
3,108615.jpg,0,0,0,0,female
4,066579.jpg,0,0,0,0,female


In [178]:
from keras.utils import to_categorical
from PIL import Image

class UtkFaceDataGenerator():
    """
    Data generator for the UTKFace dataset. This class should be used when training our Keras multi-output model.
    """
    def __init__(self, df):
        self.df = df

        self.GENDER = True
        self.AGE = False
        self.RACE = False

        self.rel_path = 'input/celeba-dataset/img_align_celeba' #"input/adiencedb/inside/faces"

        self.FILE_HEADER = "file_name"
        
    def generate_split_indexes(self):
        p = np.random.permutation(len(self.df))
        train_up_to = int(len(self.df) * TRAIN_TEST_SPLIT)
        train_idx = p[:train_up_to]
        test_idx = p[train_up_to:]        
        train_up_to = int(train_up_to * TRAIN_TEST_SPLIT)
        train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]
        
        # converts alias to id
        if self.GENDER:
#             pass
#             self.df['gender'] = self.df['gender']
            self.df['gender'] = self.df['gender'].map(lambda gender: dataset_dict['gender_alias'][gender])
        if self.RACE:
              self.df['race_id'] = self.df['race'].map(lambda race: dataset_dict['race_alias'][race])        
        if self.AGE:
            self.max_age = self.df['age'].max()
        
        return train_idx, valid_idx, test_idx
    
    def preprocess_image(self, img_path):
        """
        Used to perform some minor preprocessing on the image before inputting into the network.
        """

        img_path = os.path.join(self.rel_path,img_path)
        
#         print(img_path)
        im = Image.open(img_path)
        im = im.resize((IM_WIDTH, IM_HEIGHT))
        im = np.array(im) / 255.0
        
        return im
        
    def generate_images(self, image_idx, is_training, batch_size=16):
        """
        Used to generate a batch with images when training/testing/validating our Keras model.
        """
        
        # arrays to store our batched data
        images, ages, races, genders = [], [], [], []

        while True:
            for idx in image_idx:
                person = self.df.iloc[idx]
                
                if self.AGE:
                    age = person['age']
                if self.RACE:
                    race = person['race_id']
                if self.GENDER:
                    gender = person['gender']
                file = person[self.FILE_HEADER]
                
                im = self.preprocess_image(file)
                

                if self.AGE:
                    ages.append(age / self.max_age)
                if self.RACE:
                    races.append(to_categorical(race, len(dataset_dict['race_id'])))
                if self.GENDER:
                    genders.append(to_categorical(gender, len(dataset_dict['gender'])))
                images.append(im)
                
                # yielding condition
                if len(images) >= batch_size:
                    to_yield = []
                    if self.AGE:
                        to_yield.append(np.array(ages))
                    if self.RACE:
                        to_yield.append(np.array(races))
                    if self.GENDER:
                        to_yield.append(np.array(genders))

                    print(len(to_yield))
                    yield np.array(images), to_yield

                    images, ages, races, genders = [], [], [], []
                    
            if not is_training:
                break
                
data_generator = UtkFaceDataGenerator(df_train)
train_idx, valid_idx, test_idx = data_generator.generate_split_indexes() 

In [179]:
from keras.models import Model
from tensorflow.keras.layers import BatchNormalization
# from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Lambda
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
import tensorflow as tf

USE_AGE = False
USE_RACE = False
USE_GENDER = True


class UtkMultiOutputModel():
    """
    Used to generate our multi-output model. This CNN contains three branches, one for age, other for 
    sex and another for race. Each branch contains a sequence of Convolutional Layers that is defined
    on the make_default_hidden_layers method.
    """
    def make_default_hidden_layers(self, inputs):
        """
        Used to generate a default set of hidden layers. The structure used in this network is defined as:
        
        Conv2D -> BatchNormalization -> Pooling -> Dropout
        """
        x = Conv2D(16, (3, 3), padding="same")(inputs)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)        
        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)        
        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)        
        return x    
    def build_race_branch(self, inputs, num_races):
        """
        Used to build the race branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)        
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_races)(x)
        x = Activation("softmax", name="race_output")(x)        
        return x    
    
    def build_gender_branch(self, inputs, num_genders=2):
        """
        Used to build the gender branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)        
        x = self.make_default_hidden_layers(inputs)        
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_genders)(x)
        x = Activation("sigmoid", name="gender_output")(x)        
        return x    
    def build_age_branch(self, inputs):   
        """
        Used to build the age branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.        """
        x = self.make_default_hidden_layers(inputs)        
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1)(x)
        x = Activation("linear", name="age_output")(x)        
        return x    
    
    def assemble_full_model(self, width, height): # num_races
        """
        Used to assemble our multi-output model CNN.
        """
        input_shape = (height, width, 3)#3)        
        inputs = Input(shape=input_shape)        
        output_array = []
        if USE_AGE:
            age_branch = self.build_age_branch(inputs)
            output_array.append(age_branch)
        if USE_RACE:
            race_branch = self.build_race_branch(inputs, num_races)
            output_array.append(race_branch)
        if USE_GENDER:
            gender_branch = self.build_gender_branch(inputs)
            output_array.append(gender_branch)
        model = Model(inputs=inputs,
                     outputs = output_array,
                     name="face_net")        
        return model
    
model = UtkMultiOutputModel().assemble_full_model(IM_WIDTH, IM_HEIGHT) #, num_races=len(dataset_dict['race_alias']))

In [180]:
from keras.optimizers import Adam

init_lr = 1e-4
epochs = 100

opt = Adam(learning_rate=init_lr, decay=init_lr / epochs)

USE_AGE = False
USE_RACE = False
USE_GENDER = True


loss_map = {}
loss_weight_map = {}
metrics = {}

if USE_AGE:
    loss_map['age_output'] = 'mse'
    loss_weight_map['age_output'] = 4.
    metrics['age_output'] = 'mae'

if USE_RACE:
    loss_map['race_output'] = 'categorical_crossentropy'
    loss_weight_map['race_output'] = 1.5
    metrics['race_output'] = 'accuracy'

if USE_GENDER:
    loss_map['gender_output'] = 'binary_crossentropy'
    loss_weight_map['gender_output'] = 0.1
    metrics['gender_output'] = 'accuracy'




model.compile(optimizer=opt, 
              loss=loss_map,
              loss_weights=loss_weight_map,
              metrics=metrics)

In [181]:
from keras.callbacks import ModelCheckpoint

batch_size = 32
valid_batch_size = 32
train_gen = data_generator.generate_images(train_idx, is_training=True, batch_size=batch_size)
valid_gen = data_generator.generate_images(valid_idx, is_training=True, batch_size=valid_batch_size)

callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]


history = model.fit(train_gen,
                    steps_per_epoch=len(train_idx)//batch_size,
                    epochs=epochs,
                    callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valid_idx)//valid_batch_size)

1
Epoch 1/100


AttributeError: in user code:

    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\training.py", line 998, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\training.py", line 1092, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\compile_utils.py", line 577, in update_state
        self.build(y_pred, y_true)
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\compile_utils.py", line 483, in build
        self._metrics = tf.__internal__.nest.map_structure_up_to(
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\compile_utils.py", line 631, in _get_metric_objects
        return [self._get_metric_object(m, y_t, y_p) for m in metrics]
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\compile_utils.py", line 631, in <listcomp>
        return [self._get_metric_object(m, y_t, y_p) for m in metrics]
    File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\compile_utils.py", line 652, in _get_metric_object
        y_t_rank = len(y_t.shape.as_list())

    AttributeError: 'tuple' object has no attribute 'shape'


In [None]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['gender_output_accuracy'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_gender_output_accuracy'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for gender feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

In [None]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scattergl(
                    y=history.history['age_output_mae'],
                    name='Train'))
fig.add_trace(go.Scattergl(
                    y=history.history['val_age_output_mae'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Mean Absolute Error for age feature',
                  xaxis_title='Epoch',
                  yaxis_title='Mean Absolute Error')
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scattergl(
                    y=history.history['loss'],
                    name='Train'))
fig.add_trace(go.Scattergl(
                    y=history.history['val_loss'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Overall loss',
                  xaxis_title='Epoch',
                  yaxis_title='Loss')
fig.show()

In [None]:
test_batch_size = 128
test_generator = data_generator.generate_images(test_idx, is_training=False, batch_size=test_batch_size)
age_pred, gender_pred = model.predict_generator(test_generator,
                                                steps=len(test_idx)//test_batch_size)

In [None]:
test_generator = data_generator.generate_images(test_idx, is_training=False, batch_size=test_batch_size)
samples = 0
images, age_true, gender_true = [], [], []
for test_batch in test_generator:
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    age_true.extend(labels[0])
#     race_true.extend(labels[1])
    gender_true.extend(labels[1])
    
age_true = np.array(age_true)
# race_true = np.array(race_true)
gender_true = np.array(gender_true)
age_pred = age_pred * data_generator.max_age

In [None]:
cr_gender = classification_report(gender_true, gender_pred, target_names=dataset_dict['gender_alias'].keys())
print(cr_gender)

In [None]:
from sklearn.metrics import r2_scoreprint('R2 score for age: ', r2_score(age_true, age_pred))

## Generate K fold dataset

In [None]:
df_0.dtypes

In [None]:
!pip install keras_vggface

In [None]:
import keras_vggface

In [None]:
def generator_wrapper(generator):
    for batch_x,batch_y in generator:
        yield (batch_x,[batch_y[:,i] for i in range(2)])
        
datagen = ImageDataGenerator(rescale=1./255)
feature_col = ['age', 'gender']
folds = [df_0, df_1, df_2, df_3, df_4]

# for fold in folds:
#     fold['age'] = fold['age'].astype("category")

fold_acc = []

# VGG 16 as base
# base_model = vgg16.vggface(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
base_model = VGGFace(model='resnet50', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(512, activation="relu")(x)
gender_out = Dense(1,name="gender_output", activation = 'sigmoid')(x)
age_out = Dense(1,name="age_output", activation = 'softmax')(x)

model = Model(inputs=base_model.input, outputs=[age_out, gender_out])
model.compile(optimizer=Adam(learning_rate=1e-3), 
              loss={"age_output":"sparse_categorical_crossentropy", 
                    "gender_output":"binary_crossentropy"}
              ,
            metrics={'age_output': 'accuracy', 
                    'gender_output': 'accuracy'}
                    )

es = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

for val_set in range(len(folds)):
    df_train = pd.DataFrame()
    # combine the other k-1 folds
    for sel_fold in range(len(folds)):
        if sel_fold == val_set:
            continue
        df_train = pd.concat([df_train,folds[sel_fold]])
    

    val_generator = datagen.flow_from_dataframe(
        dataframe=folds[val_set], 
        directory=images_folder, 
        x_col='user_id', 
        y_col=feature_col, 
        class_mode="raw", 
        target_size=(IMG_HEIGHT, IMG_WIDTH), 
        batch_size=BATCH_SIZE,
        shuffle=True
    )
    
    train_generator = datagen.flow_from_dataframe(
        dataframe=df_train, 
        directory=images_folder, 
        x_col='user_id', 
        y_col=feature_col, 
        class_mode="raw", 
        target_size=(IMG_HEIGHT, IMG_WIDTH), 
        batch_size=BATCH_SIZE,
        shuffle=True
    )
    
    hist = model.fit(
        generator_wrapper(train_generator),
        steps_per_epoch=train_generator.n//train_generator.batch_size,
        validation_data=generator_wrapper(val_generator),
        validation_steps=val_generator.n//val_generator.batch_size,
        callbacks=[es],
        epochs=10,
        verbose=1
    )
    
    fold_acc.append(hist.history['val_gender_output_accuracy'])

## Model creation (multi-label classification)