In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from keras_preprocessing.image import load_img

In [2]:
df = pd.read_csv('dataset_faces.csv')

In [3]:
df.head()

Unnamed: 0,filename,age,gender,ethnicity
0,100_1_0_20170110183726390.jpg,100,1,0
1,100_1_2_20170105174847679.jpg,100,1,2
2,100_1_2_20170110182836729.jpg,100,1,2
3,101_1_2_20170105174739309.jpg,101,1,2
4,10_0_0_20161220222308131.jpg,10,0,0


In [4]:
# map labels
gender_dict = {0: 'Male', 1: 'Female'}
ethnicity_dict = {}

# Feature Extraction

In [6]:
from PIL import Image

features = []

for image in df['filename']:
    path = 'pic/' + image
    img = load_img(path)
    img = img.resize((128, 128), Image.ANTIALIAS)
    img = np.array(img)
    features.append(img)

# Convert to np array, so it can be handled by neural networks
features = np.array(features)
# You can ignore that if using rgb. If you want to use grayscale, you should put 1 instead of 3.
features = features.reshape(len(features), 128, 128, 3)

  img = img.resize((128, 128), Image.ANTIALIAS)


# Normalize the image

In [7]:
# Because the pixel ratio is between 0 and 255:
X = features / 255.0

In [8]:
X.shape

(10137, 128, 128, 3)

In [9]:
# Convert gender and age to numpy array:
y_gender = np.array(df['gender'])
y_age = np.array(df['age'])

We have an image as an input, and we get back 2 outputs:
* Age(Regression problem)
* Gender(Classification problem)

In [16]:
input_shape = (128, 128, 3)

In [22]:
# Model creation
from keras.layers import MaxPooling2D, Flatten, Dense, Dropout, Conv2D
from keras import Input, Model

inputs = Input(input_shape)

In [23]:
# convolutional layers:
# 32 is the number of filters
conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
maxp_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(maxp_1)
maxp_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu')(maxp_2)
maxp_3 = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu')(maxp_3)
maxp_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)

# Flat everything: The convulational layer will be 'flattened' to a single dimension.
flatten = Flatten()(maxp_4)

# Fully connected layers
dense_1 = Dense(256, activation='relu')(flatten)
dense_2 = Dense(256, activation='relu')(flatten)

dropout_1 = Dropout(0.3)(dense_1)
dropout_2 = Dropout(0.3)(dense_2)

# there is 1 because there is just one output.
output_1 = Dense(1, activation='sigmoid', name='gender_out')(dropout_1)
output_2 = Dense(1, activation='relu', name='age_out')(dropout_2)


In [24]:
model = Model(inputs=[inputs], outputs=[output_1, output_2])

In [31]:
model.compile(loss=['binary_crossentropy', 'mae'], optimizer='adam', metrics=['accuracy'])
# binary_crossentropy is for the gender, while mae is for the regression (age).

In [32]:
model.summary()
# This give us the layers we are having

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_5 (Conv2D)              (None, 126, 126, 32  896         ['input_6[0][0]']                
                                )                                                                 
                                                                                                  
 max_pooling2d_4 (MaxPooling2D)  (None, 63, 63, 32)  0           ['conv2d_5[0][0]']               
                                                                                            

In [None]:
from keras.utils import plot_model
plot_model(model)

In [33]:

history = model.fit(x=X, y=[y_gender, y_age], batch_size=32, epochs=30, validation_split=0.2)
# validation_split stablished that 0.2 is for testing and 0.8 pro training.input_shape = (128, 128, 3)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30

KeyboardInterrupt: 

# Plot the Results

In [None]:
accuracy = history.history['gender_out_accuracy']
val_acc = history.history['val_gender_out_accuracy']
epochs = range(len(accuracy))

plt.plot(epochs, accuracy, 'b', label='Training Accurac')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Accuracy')
plt.legend()
plt.figure()

loss = history.history['gender_out_loss']
val_loss = history.history['val_gender_out_loss']

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Accuracy')
plt.legend()
plt.figure()

# Prediction with random Picture

In [None]:
image_index = 10

In [None]:
prediction = model.predict(X[image_index].reshape(1, 128, 128, 3))

In [None]:
pred_gender = prediction[0][0][0]
pred_age = prediction[1][0][0]

In [None]:
print(f"Real gender: {df['gender'][image_index]}")
print(f"Predict gender: {pred_gender}")
print(f"Real age: {df['age'][image_index]}")
print(f"Real gender: {pred_age}")