UTK Face - AGe, Gender and ethicity prediction

Kaggle - https://www.kaggle.com/datasets/jangedoo/utkface-new

In [1]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

The syntax of the command is incorrect.
'cp' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
!kaggle datasets download -d jangedoo/utkface-new

Dataset URL: https://www.kaggle.com/datasets/jangedoo/utkface-new
License(s): copyright-authors
Downloading utkface-new.zip to /content
100% 330M/331M [00:13<00:00, 29.8MB/s]
100% 331M/331M [00:13<00:00, 25.9MB/s]


In [10]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import zipfile

from tensorflow import keras
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.models import Model # Funtonal API
from keras.models import Sequential # Sequential API
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model


In [5]:
zip = zipfile.ZipFile("/content/utkface-new.zip", 'r')
zip.extractall("/content")
zip.close()

In [14]:
folder_path = "/content/utkface_aligned_cropped/UTKFace"

In [20]:
age = []
gender = []
emotion = []
img_path = []

for file in os.listdir(folder_path):
  age.append(int(file.split('_')[0]))
  gender.append(int(file.split('_')[1]))
  emotion.append(int(file.split('_')[2][0]))
  img_path.append(file)

In [22]:
print(len(age))
print(len(gender))
print(len(emotion))
print(len(img_path))

23708
23708
23708
23708


In [27]:
df = pd.DataFrame({'age' : age, 'gender' : gender, 'emotion' : emotion
                  , 'img_path' : img_path})

In [28]:
df.head()

Unnamed: 0,age,gender,emotion,img_path
0,2,1,3,2_1_3_20161219225311520.jpg.chip.jpg
1,45,1,3,45_1_3_20170119183505494.jpg.chip.jpg
2,35,0,3,35_0_3_20170119201342436.jpg.chip.jpg
3,16,1,4,16_1_4_20170102234841875.jpg.chip.jpg
4,60,1,0,60_1_0_20170110141759687.jpg.chip.jpg


In [58]:
df['emotion'].value_counts()

emotion
0    10078
1     4526
3     3975
2     3437
4     1692
Name: count, dtype: int64

In [39]:
train_df = df.sample(frac = 1, random_state = 0).iloc[:20000]
test_df = df.sample(frac = 1, random_state = 0).iloc[20000:]

print(train_df.shape, test_df.shape)

(20000, 4) (3708, 4)


In [43]:
train_datagen = ImageDataGenerator(rotation_range = 30,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.3,
                                   channel_shift_range = 0.2,
                                   fill_mode = "nearest",
                                   horizontal_flip = True,
                                   vertical_flip = True,
                                   rescale = 1./255)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [44]:
train_generator = train_datagen.flow_from_dataframe(train_df, directory = folder_path,
                                                    x_col = 'img_path', y_col = ['age', 'gender', 'emotion'],
                                                    target_size = (200, 200),
                                                    class_mode = 'multi_output')

Found 20000 validated image filenames.


In [45]:
test_generator = test_datagen.flow_from_dataframe(test_df, directory = folder_path,
                                                    x_col = 'img_path', y_col = ['age', 'gender', 'emotion'],
                                                    target_size = (200, 200),
                                                    class_mode = 'multi_output')

Found 3708 validated image filenames.


Building VGG 16 Architecture

In [47]:
from keras.applications.vgg16 import VGG16
from keras.layers import *
from keras.models import Model

In [59]:
vgg16_model = VGG16(include_top = False,
                    weights = "imagenet",
                    input_shape = (200, 200, 3))
vgg16_model.trainable = False
# We give false to not change the actual VGG model trained with imagenet

output = vgg16_model.layers[-1].output # -1 to access the last layer (Dense layer)
flatten = Flatten()(output)

dense1 = Dense(512, activation = 'relu')(flatten) # Age
dense2 = Dense(512, activation = 'relu')(flatten) # Gender
dense3 = Dense(512, activation = 'relu')(flatten) # Emotion

# Age
dense4 = Dense(512, activation = 'relu')(dense1)

# Gender
dense5 = Dense(512, activation = 'relu')(dense2)

# Emotion
dense6 = Dense(512, activation = 'relu')(dense3)

# Outputs
output1 = Dense(1, activation = 'linear', name = 'age')(dense4)
output2 = Dense(1, activation = 'sigmoid', name = 'gender')(dense5)
output3 = Dense(5, activation = 'softmax', name = 'emotion')(dense6)

# Functional API
model = Model(inputs = vgg16_model.input, outputs = [output1, output2, output3])
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 200, 200, 3)]        0         []                            
                                                                                                  
 block1_conv1 (Conv2D)       (None, 200, 200, 64)         1792      ['input_3[0][0]']             
                                                                                                  
 block1_conv2 (Conv2D)       (None, 200, 200, 64)         36928     ['block1_conv1[0][0]']        
                                                                                                  
 block1_pool (MaxPooling2D)  (None, 100, 100, 64)         0         ['block1_conv2[0][0]']        
                                                                                            

In [60]:
model.compile(optimizer = 'adam', loss = {'age' : 'mae', 'gender' : 'binary_crossentropy', 'emotion' : 'sparse_categorical_crossentropy'},
                      metrics = {'age' : 'mae', 'gender' : 'accuracy', 'emotion' : 'accuracy'})

In [61]:
history = model.fit(train_generator, batch_size = 32, epochs = 10, validation_data = test_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
