In [1]:
# ! pip install pandas
# ! pip install sklearn

In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tf.config.experimental.set_memory_growth(gpu, True)


In [2]:
# IMPORTING THE PREPROCESSED DATA - CONTAINING IMAGES TOO
import pandas as pd
df = pd.read_pickle('withpixel_gender.pkl')

In [3]:
"""
full path - the path of image file
gender - 0,1 denoting male female
age - the category of the person with age group
pixels - image encoded as matrix in dataframe
"""
df

Unnamed: 0,full_path,gender,age,pixels
0,[17/10000217_1981-05-05_2009.jpg],1.0,28,"[255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255..."
2,[12/100012_1948-07-03_2008.jpg],1.0,60,"[92.0, 97.0, 91.0, 89.0, 94.0, 90.0, 91.0, 96...."
4,[16/10002116_1971-05-31_2012.jpg],0.0,41,"[61.0, 30.0, 10.0, 61.0, 30.0, 10.0, 61.0, 30...."
5,[02/10002702_1960-11-09_2012.jpg],0.0,52,"[97.0, 122.0, 178.0, 97.0, 122.0, 178.0, 97.0,..."
6,[41/10003541_1937-09-27_1971.jpg],1.0,34,"[190.0, 189.0, 194.0, 204.0, 203.0, 208.0, 203..."
...,...,...,...,...
62321,[38/9996938_1937-02-15_1968.jpg],1.0,31,"[71.0, 71.0, 71.0, 71.0, 71.0, 71.0, 71.0, 71...."
62322,[46/9996946_1943-11-01_1968.jpg],1.0,25,"[54.0, 54.0, 54.0, 44.0, 44.0, 44.0, 28.0, 28...."
62323,[49/9996949_1937-04-17_1963.jpg],1.0,26,"[41.0, 41.0, 41.0, 29.0, 29.0, 29.0, 22.0, 22...."
62325,[09/9998109_1972-12-27_2013.jpg],1.0,41,"[137.0, 174.0, 94.0, 137.0, 174.0, 94.0, 137.0..."


In [4]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np

In [14]:
classes = 101 # 0 to 100
target = df['age'].values
target_classes = keras.utils.to_categorical(target, classes)

features = []

# limiting the target classes, and featues to limit memory usage
# both target and feature must match with the split dataset 
limit_dataset = 10000

target_classes = target_classes[:limit_dataset]

for i in range(0, df.shape[0]):
    features.append(df['pixels'].values[i])
    if len(features)=>limit_dataset:
        print('Done - decided number of features are collected')
        break

# convering the list into numpy - that can be used for batch training
features = np.array(features)
features = features.reshape(features.shape[0], 224, 224, 3)

done - given number of features collected


In [18]:
# managing splits in dataset for training and evaluation
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(features, target_classes, test_size=0.30)

print(len(features))
print(len(target_classes))
print(len(train_x),len(test__x))

In [20]:
# since memory consumption is very high -we need to delete the df to get it back
# del df
# del features # 20 gb ram freeedddd

In [10]:
# if loading and saving the features from direct pkl to avoid using pandas
# dataframe already has pickle loader so no problem to import pickle

# import pickle

# with open('image_features.pickle', 'wb') as handle:
#     pickle.dump(features, handle)
    
# with open('gender_target.pickle', 'wb') as handle:
#     pickle.dump(target_gender, handle)
    
# with open('age_target.pickle', 'wb') as handle:
#     pickle.dump(target, handle)

In [21]:
# Creating the base VGG Face Model

from tensorflow.keras import Sequential
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Activation


# vgg-face model 

model = Sequential()
model.add(ZeroPadding2D((1,1), input_shape=(224,224,3)))
model.add(Convolution2D(64, (3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128,(3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128,(3,3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3,3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(Convolution2D(4096, (7,7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1,1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1,1)))
model.add(Flatten())
model.add(Activation('softmax'))



In [22]:
# pretrained vgg weights availble on drive
# you can find it here: https://drive.google.com/file/d/1CPSeum3HpopfomUEK1gybeuIVoeJT_Eo/view?usp=sharing

model.load_weights('vgg_face_weights.h5')

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d (ZeroPadding2 (None, 226, 226, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 224, 224, 64)      1792      
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 226, 226, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 64)      0         
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 114, 114, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 112, 112, 128)     7

In [24]:
# Changing the last layers of network to get 100 predictions for classes
# 

from tensorflow.keras import Model

# lock the layer weights for early layers 
# - they could already detect some patterns
# fitting the network from scratch might cause to lose this info
# freeze all layers except last 3 conv layers - 2622 units
# just 101 units for age prediction task
# then add custom layer for 101 layers

# to not lose the training done before in pretrained weights
for layer in model.layers[:-7]:
    layer.trainable = False

base_model_output = Sequential()
base_model_output = Convolution2D(101, (1,1), name='predictions')(model.layers[-4].output)
base_model_output = Flatten()(base_model_output)
base_model_output = Activation('softmax')(base_model_output)

age_model = Model(inputs=model.input, outputs=base_model_output)

In [25]:
age_model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_input (InputL [(None, 224, 224, 3)]     0         
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 226, 226, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 224, 224, 64)      1792      
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 226, 226, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 64)      0         
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 114, 114, 64)     

In [27]:
# Training
# Model checkpoint to make model save if improvement is there in accuracy
# Early stopping if required accuracy is reached - it wont go till end epoch

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# 101 classes so categorical cross entropy - but have to read once again
age_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath='age_model.hdf5',
                              monitor='val_loss', verbose=1, save_best_only=True,
                              mode='auto')
scores=[]
epochs = 250; batch_size=256

for i in range(epochs):
    print('epoch',i)
    ix_train = np.random.choice(train_x.shape[0], size=batch_size)
    score = age_model.fit(train_x[ix_train], train_y[ix_train],
                         epochs=2, validation_data=(test_x, test_y),
                         callbacks=[checkpointer])
    scores.append(score)

epoch 0
Epoch 1/2
Epoch 00001: val_loss improved from inf to 4.37888, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.37888 to 4.37638, saving model to age_model.hdf5
epoch 1
Epoch 1/2
Epoch 00001: val_loss improved from 4.37638 to 4.37163, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.37163 to 4.37106, saving model to age_model.hdf5
epoch 2
Epoch 1/2
Epoch 00001: val_loss improved from 4.37106 to 4.36830, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.36830 to 4.36550, saving model to age_model.hdf5
epoch 3
Epoch 1/2
Epoch 00001: val_loss improved from 4.36550 to 4.36275, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.36275 to 4.36020, saving model to age_model.hdf5
epoch 4
Epoch 1/2
Epoch 00001: val_loss improved from 4.36020 to 4.35771, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.35771 to 4.35530, saving model to age_model

Epoch 00001: val_loss improved from 4.33473 to 4.33218, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.33218 to 4.32963, saving model to age_model.hdf5
epoch 10
Epoch 1/2
Epoch 00001: val_loss improved from 4.32963 to 4.32717, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.32717 to 4.32475, saving model to age_model.hdf5
epoch 11
Epoch 1/2
Epoch 00001: val_loss improved from 4.32475 to 4.32218, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.32218 to 4.31959, saving model to age_model.hdf5
epoch 12
Epoch 1/2
Epoch 00001: val_loss improved from 4.31959 to 4.31705, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.31705 to 4.31463, saving model to age_model.hdf5
epoch 13
Epoch 1/2
Epoch 00001: val_loss improved from 4.31463 to 4.31234, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.31234 to 4.31006, saving model to age_model.hdf5
epoc

Epoch 00002: val_loss improved from 4.29036 to 4.28837, saving model to age_model.hdf5
epoch 19
Epoch 1/2
Epoch 00001: val_loss improved from 4.28837 to 4.28649, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.28649 to 4.28467, saving model to age_model.hdf5
epoch 20
Epoch 1/2
Epoch 00001: val_loss improved from 4.28467 to 4.28275, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.28275 to 4.28074, saving model to age_model.hdf5
epoch 21
Epoch 1/2
Epoch 00001: val_loss improved from 4.28074 to 4.27872, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.27872 to 4.27670, saving model to age_model.hdf5
epoch 22
Epoch 1/2
Epoch 00001: val_loss improved from 4.27670 to 4.27470, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.27470 to 4.27278, saving model to age_model.hdf5
epoch 23
Epoch 1/2
Epoch 00001: val_loss improved from 4.27278 to 4.27088, saving model to age_model.

Epoch 00001: val_loss improved from 4.25465 to 4.25286, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.25286 to 4.25104, saving model to age_model.hdf5
epoch 29
Epoch 1/2
Epoch 00001: val_loss improved from 4.25104 to 4.24930, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.24930 to 4.24754, saving model to age_model.hdf5
epoch 30
Epoch 1/2
Epoch 00001: val_loss improved from 4.24754 to 4.24583, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.24583 to 4.24420, saving model to age_model.hdf5
epoch 31
Epoch 1/2
Epoch 00001: val_loss improved from 4.24420 to 4.24261, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.24261 to 4.24102, saving model to age_model.hdf5
epoch 32
Epoch 1/2
Epoch 00001: val_loss did not improve from 4.24102
Epoch 2/2
Epoch 00002: val_loss improved from 4.24102 to 4.23772, saving model to age_model.hdf5
epoch 33
Epoch 1/2
Epoch 00001: val_loss

Epoch 2/2
Epoch 00002: val_loss improved from 4.22511 to 4.22375, saving model to age_model.hdf5
epoch 38
Epoch 1/2
Epoch 00001: val_loss improved from 4.22375 to 4.22237, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.22237 to 4.22098, saving model to age_model.hdf5
epoch 39
Epoch 1/2
Epoch 00001: val_loss improved from 4.22098 to 4.21956, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.21956 to 4.21812, saving model to age_model.hdf5
epoch 40
Epoch 1/2
Epoch 00001: val_loss improved from 4.21812 to 4.21669, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.21669 to 4.21533, saving model to age_model.hdf5
epoch 41
Epoch 1/2
Epoch 00001: val_loss improved from 4.21533 to 4.21395, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.21395 to 4.21260, saving model to age_model.hdf5
epoch 42
Epoch 1/2
Epoch 00001: val_loss improved from 4.21260 to 4.21118, saving model to 

epoch 47
Epoch 1/2
Epoch 00001: val_loss improved from 4.19956 to 4.19844, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss did not improve from 4.19844
epoch 48
Epoch 1/2
Epoch 00001: val_loss improved from 4.19844 to 4.19618, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.19618 to 4.19500, saving model to age_model.hdf5
epoch 49
Epoch 1/2
Epoch 00001: val_loss improved from 4.19500 to 4.19385, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.19385 to 4.19268, saving model to age_model.hdf5
epoch 50
Epoch 1/2
Epoch 00001: val_loss improved from 4.19268 to 4.19157, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.19157 to 4.19052, saving model to age_model.hdf5
epoch 51
Epoch 1/2
Epoch 00001: val_loss improved from 4.19052 to 4.18950, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.18950 to 4.18858, saving model to age_model.hdf5
epoch 52
Epoch 1/2
Ep

Epoch 00001: val_loss improved from 4.18013 to 4.17905, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.17905 to 4.17793, saving model to age_model.hdf5
epoch 57
Epoch 1/2
Epoch 00001: val_loss improved from 4.17793 to 4.17687, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.17687 to 4.17591, saving model to age_model.hdf5
epoch 58
Epoch 1/2
Epoch 00001: val_loss improved from 4.17591 to 4.17492, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.17492 to 4.17396, saving model to age_model.hdf5
epoch 59
Epoch 1/2
Epoch 00001: val_loss improved from 4.17396 to 4.17297, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.17297 to 4.17196, saving model to age_model.hdf5
epoch 60
Epoch 1/2
Epoch 00001: val_loss improved from 4.17196 to 4.17096, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.17096 to 4.16999, saving model to age_model.hdf5
epoc

Epoch 00002: val_loss improved from 4.16127 to 4.16037, saving model to age_model.hdf5
epoch 66
Epoch 1/2
Epoch 00001: val_loss improved from 4.16037 to 4.15949, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.15949 to 4.15867, saving model to age_model.hdf5
epoch 67
Epoch 1/2
Epoch 00001: val_loss improved from 4.15867 to 4.15780, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.15780 to 4.15689, saving model to age_model.hdf5
epoch 68
Epoch 1/2
Epoch 00001: val_loss improved from 4.15689 to 4.15598, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.15598 to 4.15513, saving model to age_model.hdf5
epoch 69
Epoch 1/2
Epoch 00001: val_loss did not improve from 4.15513
Epoch 2/2
Epoch 00002: val_loss improved from 4.15513 to 4.15340, saving model to age_model.hdf5
epoch 70
Epoch 1/2
Epoch 00001: val_loss improved from 4.15340 to 4.15264, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss

Epoch 00001: val_loss improved from 4.14586 to 4.14513, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.14513 to 4.14443, saving model to age_model.hdf5
epoch 76
Epoch 1/2
Epoch 00001: val_loss improved from 4.14443 to 4.14373, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.14373 to 4.14305, saving model to age_model.hdf5
epoch 77
Epoch 1/2
Epoch 00001: val_loss improved from 4.14305 to 4.14234, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.14234 to 4.14165, saving model to age_model.hdf5
epoch 78
Epoch 1/2
Epoch 00001: val_loss improved from 4.14165 to 4.14096, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.14096 to 4.14031, saving model to age_model.hdf5
epoch 79
Epoch 1/2
Epoch 00001: val_loss improved from 4.14031 to 4.13963, saving model to age_model.hdf5
Epoch 2/2
Epoch 00002: val_loss improved from 4.13963 to 4.13882, saving model to age_model.hdf5
epoc

KeyboardInterrupt: 