In [3]:
import pandas as pd
import numpy as np
import cv2    
import matplotlib.pyplot as plt

from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils, array_to_img, img_to_array, load_img
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import vgg16

from IPython.display import Image
import tensorflow as tf
import keras_tuner as kt

# Importing Celeba DATASET

In [4]:
main_folder = 'input/celeba-dataset/'
images_folder = main_folder + 'img_align_celeba/'
weights_folder = 'weights/'
EXAMPLE_PIC = images_folder + '000506.jpg'

TRAINING_SAMPLES = 10000
VALIDATION_SAMPLES = 2000
TEST_SAMPLES = 2000
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 16
NUM_EPOCHS = 20

In [5]:
# import the data set that include the attribute for each picture
df_attr = pd.read_csv(main_folder + 'list_attr_celeba.csv')
df_attr.head()

Unnamed: 0,file_name,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
2,000003.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,1,-1,-1,-1,-1,-1,1
3,000004.jpg,-1,-1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,1,-1,1,1,-1,1
4,000005.jpg,-1,1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1


In [6]:
df_attr.set_index('file_name', inplace=True)
df_attr.replace(to_replace=-1, value=0, inplace=True) #replace -1 by 0
df_attr.shape

(202599, 40)

In [7]:
df_partition = pd.read_csv(main_folder + 'list_eval_partition.csv')
df_partition.head()
df_partition['val'].value_counts().sort_index()

# join the partition with the attributes
df_partition.set_index('file_name', inplace=True)
df_par_attr = df_partition.join(df_attr['Male'].astype(str), how='inner')
df_par_attr.head()

Unnamed: 0_level_0,val,Male
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1
000001.jpg,0,0
000002.jpg,0,0
000003.jpg,0,1
000004.jpg,0,0
000005.jpg,0,0


In [8]:
df_train = df_par_attr[(df_par_attr['val'] == 0) & (df_par_attr['Male'] == '0')].sample(TRAINING_SAMPLES//2)
df_train = pd.concat([
    df_train,
    df_par_attr[(df_par_attr['val'] == 0) & (df_par_attr['Male'] == '1')].sample(TRAINING_SAMPLES//2)
])

df_train.reset_index(inplace=True)
df_train

Unnamed: 0,file_name,val,Male
0,145449.jpg,0,0
1,077615.jpg,0,0
2,027160.jpg,0,0
3,121751.jpg,0,0
4,028137.jpg,0,0
...,...,...,...
9995,079254.jpg,0,1
9996,104420.jpg,0,1
9997,019082.jpg,0,1
9998,054358.jpg,0,1


# Generating TRAIN TEST AND VAL DATASETS

In [9]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_dataframe(
    dataframe=df_train, 
    directory=images_folder, 
    x_col='file_name', 
    y_col="Male", 
    class_mode="binary", 
    color_mode="rgb",
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)

Found 10000 validated image filenames belonging to 2 classes.


In [10]:
df_val = df_par_attr[(df_par_attr['val'] == 1) & (df_par_attr['Male'] == '0')].sample(VALIDATION_SAMPLES//2)
df_val = pd.concat([
    df_val,
    df_par_attr[(df_par_attr['val'] == 1) & (df_par_attr['Male'] == '1')].sample(VALIDATION_SAMPLES//2)
])

df_val.reset_index(inplace=True)
df_val

Unnamed: 0,file_name,val,Male
0,170384.jpg,1,0
1,167439.jpg,1,0
2,171077.jpg,1,0
3,171853.jpg,1,0
4,163699.jpg,1,0
...,...,...,...
1995,163501.jpg,1,1
1996,180556.jpg,1,1
1997,179704.jpg,1,1
1998,173222.jpg,1,1


In [11]:
val_generator = datagen.flow_from_dataframe(
    dataframe=df_val, 
    directory=images_folder, 
    x_col='file_name', 
    y_col="Male", 
    class_mode="binary", 
    color_mode="rgb",
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)

Found 2000 validated image filenames belonging to 2 classes.


In [12]:
df_test = df_par_attr[(df_par_attr['val'] == 2) & (df_par_attr['Male'] == '0')].sample(TEST_SAMPLES//2)
df_test = pd.concat([
    df_test,
    df_par_attr[(df_par_attr['val'] == 2) & (df_par_attr['Male'] == '1')].sample(TEST_SAMPLES//2)
])

df_test.reset_index(inplace=True)
df_test

Unnamed: 0,file_name,val,Male
0,182874.jpg,2,0
1,197372.jpg,2,0
2,197371.jpg,2,0
3,195691.jpg,2,0
4,195441.jpg,2,0
...,...,...,...
1995,201571.jpg,2,1
1996,199489.jpg,2,1
1997,185504.jpg,2,1
1998,201700.jpg,2,1


In [13]:
test_generator = datagen.flow_from_dataframe(
    dataframe=df_test, 
    directory=images_folder, 
    x_col='file_name', 
    y_col="Male", 
    class_mode='binary', 
    color_mode="rgb",
    target_size=(IMG_HEIGHT, IMG_WIDTH), 
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=42
)

Found 2000 validated image filenames belonging to 2 classes.


In [14]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VAL = val_generator.n//val_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size