In [32]:
import pandas as pd
import numpy as np
import cv2    
import matplotlib.pyplot as plt

from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils, array_to_img, img_to_array, load_img
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import vgg16

from IPython.display import Image
import tensorflow as tf
import keras_tuner as kt

In [33]:
main_folder = 'input/celeba-dataset/'
images_folder = main_folder + 'img_align_celeba/'
weights_folder = 'weights/'
EXAMPLE_PIC = images_folder + '000506.jpg'

TRAINING_SAMPLES = 10000
VALIDATION_SAMPLES = 2000
TEST_SAMPLES = 2000
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 16
NUM_EPOCHS = 20

In [34]:
# import the data set that include the attribute for each picture
df_attr = pd.read_csv(main_folder + 'list_attr_celeba.csv')
df_attr.head()

Unnamed: 0,file_name,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
2,000003.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,1,-1,-1,-1,-1,-1,1
3,000004.jpg,-1,-1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,1,-1,1,1,-1,1
4,000005.jpg,-1,1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1


In [35]:
df_attr.set_index('file_name', inplace=True)
df_attr.replace(to_replace=-1, value=0, inplace=True) #replace -1 by 0
df_attr.shape

(202599, 40)

In [36]:
df_partition = pd.read_csv(main_folder + 'list_eval_partition.csv')
df_partition.head()
df_partition['val'].value_counts().sort_index()

# join the partition with the attributes
df_partition.set_index('file_name', inplace=True)
df_par_attr = df_partition.join(df_attr['Male'].astype(str), how='inner')
df_par_attr.head()

Unnamed: 0_level_0,val,Male
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1
000001.jpg,0,0
000002.jpg,0,0
000003.jpg,0,1
000004.jpg,0,0
000005.jpg,0,0


In [37]:
df_train = df_par_attr[(df_par_attr['val'] == 0) & (df_par_attr['Male'] == '0')].sample(TRAINING_SAMPLES//2)
df_train = pd.concat([
    df_train,
    df_par_attr[(df_par_attr['val'] == 0) & (df_par_attr['Male'] == '1')].sample(TRAINING_SAMPLES//2)
])

df_train.reset_index(inplace=True)
df_train

Unnamed: 0,file_name,val,Male
0,029406.jpg,0,0
1,085461.jpg,0,0
2,125213.jpg,0,0
3,046033.jpg,0,0
4,139258.jpg,0,0
...,...,...,...
9995,136681.jpg,0,1
9996,063330.jpg,0,1
9997,131894.jpg,0,1
9998,155386.jpg,0,1


In [38]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_dataframe(
    dataframe=df_train, 
    directory=images_folder, 
    x_col='file_name', 
    y_col="Male", 
    class_mode="binary", 
    color_mode="rgb",
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)

Found 10000 validated image filenames belonging to 2 classes.


In [39]:
df_val = df_par_attr[(df_par_attr['val'] == 1) & (df_par_attr['Male'] == '0')].sample(VALIDATION_SAMPLES//2)
df_val = pd.concat([
    df_val,
    df_par_attr[(df_par_attr['val'] == 1) & (df_par_attr['Male'] == '1')].sample(VALIDATION_SAMPLES//2)
])

df_val.reset_index(inplace=True)
df_val

Unnamed: 0,file_name,val,Male
0,171283.jpg,1,0
1,180779.jpg,1,0
2,165866.jpg,1,0
3,162780.jpg,1,0
4,175292.jpg,1,0
...,...,...,...
1995,181820.jpg,1,1
1996,177216.jpg,1,1
1997,163397.jpg,1,1
1998,171826.jpg,1,1


In [40]:
val_generator = datagen.flow_from_dataframe(
    dataframe=df_val, 
    directory=images_folder, 
    x_col='file_name', 
    y_col="Male", 
    class_mode="binary", 
    color_mode="rgb",
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)

Found 2000 validated image filenames belonging to 2 classes.


In [41]:
df_test = df_par_attr[(df_par_attr['val'] == 2) & (df_par_attr['Male'] == '0')].sample(TEST_SAMPLES//2)
df_test = pd.concat([
    df_test,
    df_par_attr[(df_par_attr['val'] == 2) & (df_par_attr['Male'] == '1')].sample(TEST_SAMPLES//2)
])

df_test.reset_index(inplace=True)
df_test

Unnamed: 0,file_name,val,Male
0,186544.jpg,2,0
1,186972.jpg,2,0
2,188615.jpg,2,0
3,191014.jpg,2,0
4,201761.jpg,2,0
...,...,...,...
1995,198404.jpg,2,1
1996,187297.jpg,2,1
1997,186246.jpg,2,1
1998,183827.jpg,2,1


In [42]:
test_generator = datagen.flow_from_dataframe(
    dataframe=df_test, 
    directory=images_folder, 
    x_col='file_name', 
    y_col="Male", 
    class_mode='binary', 
    color_mode="rgb",
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=42
)

Found 2000 validated image filenames belonging to 2 classes.


In [43]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VAL = val_generator.n//val_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

## Applying Hyper Parameter tuner

In [44]:
def model_builder(hp):
    layer_1_hp_units = hp.Int('units', min_value=32, max_value=1024, step=32)
    layer_2_hp_units = hp.Int('units_2', min_value=32, max_value=1024, step=32)
    dp_hp_units = hp.Choice('drop_out', values=[0.1, 0.2, 0.3, 0.4, 0.5])
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    hp_num_frz_layers = hp.Int('fzn_layer', min_value=8, max_value=16, step=2)
    
    base_model = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
    base_model.trainable = True    
    
    for layer in base_model.layers[:hp_num_frz_layers]:
        layer.trainable = False
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(layer_1_hp_units, activation="relu")(x)
    x = Dropout(dp_hp_units)(x)
    x = Dense(layer_2_hp_units, activation="relu")(x)
    predictions = Dense(1, activation="sigmoid")(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(
        optimizer=Adam(learning_rate=hp_learning_rate), 
        loss='binary_crossentropy', 
        metrics=['accuracy']
    )

    return model

## Hyperband tuner

In [45]:
es = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True) # cant monitor val_acc or val_loss, training no such metric

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',  # cant monitor val_acc or val_loss, training no such metric
                     max_epochs=10,
                     factor=3,
                     directory='./hyper',
                     project_name='celeba_hyperband')

tuner.search(train_generator, validation_data=val_generator, epochs=10, callbacks=[es])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

Trial 30 Complete [00h 03m 28s]
val_accuracy: 0.5

Best val_accuracy So Far: 0.9825000166893005
Total elapsed time: 01h 34m 36s
INFO:tensorflow:Oracle triggered exit


In [47]:
best_hps.values

{'units': 736,
 'units_2': 736,
 'drop_out': 0.1,
 'learning_rate': 0.0001,
 'fzn_layer': 10,
 'tuner/epochs': 10,
 'tuner/initial_epoch': 4,
 'tuner/bracket': 2,
 'tuner/round': 2,
 'tuner/trial_id': '0012'}

## Bayesian Optimizer

In [46]:
# tuner_2 =  kt.BayesianOptimization(
#     model_builder,
#     objective="val_accuracy",
#     max_trials=10,
#     directory='./hyper',
#     project_name='celeba_hyper_bayesian_2')

# tuner_2.search(train_generator, validation_data=val_generator, epochs=10, callbacks=[es])

# # Get the optimal hyperparameters
# best_hps = tuner_2.get_best_hyperparameters(num_trials=1)[0]