In [131]:
import os
import numpy as np
from keras import optimizers
from keras.preprocessing import image
from keras.layers import Dense, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.optimizers import adam
from keras import models
import cv2
import dlib
import pandas as pd
import matplotlib.pyplot as plt
import sys
import time 
import progressbar
from tqdm.notebook import tqdm_notebook

In [132]:
from sklearn import svm, datasets
from sklearn.dummy import DummyClassifier
from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve, ShuffleSplit
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, recall_score, precision_score, f1_score

In [133]:
from tensorflow.python.keras.applications.vgg16 import preprocess_input
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint

In [134]:
df = pd.read_csv('../../dataset/cartoon_set/labels.csv', sep = '\t')
df = df.drop(columns = [df.columns[0]]).drop(columns = [df.columns[1]])
df['face_shape'] = df['face_shape'].apply(str)
df

Unnamed: 0,face_shape,file_name
0,4,0.png
1,4,1.png
2,3,2.png
3,0,3.png
4,2,4.png
...,...,...
9995,2,9995.png
9996,3,9996.png
9997,2,9997.png
9998,2,9998.png


In [135]:
img = ('../../dataset/cartoon_set/img')
training, testing = train_test_split(df, random_state=0)

In [136]:
# set up data generator
data_generator = ImageDataGenerator(
    rescale = 1./255.,
    validation_split = 0.25,
    horizontal_flip=True,
    vertical_flip=True   
)

# # get batches of training images from the df
# train_generator = data_generator.flow_from_dataframe(
#         dataframe=train_set,
#         x_col="file_name",
#         y_col="face_shape",
#         target_size=(178, 218),
#         batch_size=12,
#         class_mode='categorical')

# validate_generator = data_generator.flow_from_dataframe(
#         dataframe=train_set,
#         x_col="file_name",
#         y_col="face_shape",
#         target_size=(178, 218),
#         batch_size=12,
#         class_mode='categorical')

# Get batches of training dataset from the dataframe
print("Training Dataset Preparation: ")
train_generator = data_generator.flow_from_dataframe(
        dataframe = training, directory = img ,
        x_col = "file_name", y_col = "face_shape",
        class_mode = 'categorical', target_size = (178,218),
        batch_size = 32, subset = 'training') 
   
# Get batches of validation dataset from the dataframe
print("\nValidation Dataset Preparation: ")
validation_generator = data_generator.flow_from_dataframe(
        dataframe = training, directory = img ,
        x_col = "file_name", y_col = "face_shape",
        class_mode = 'categorical', target_size = (178,218),
        batch_size = 32, subset = 'validation')

Training Dataset Preparation: 
Found 5625 validated image filenames belonging to 5 classes.

Validation Dataset Preparation: 
Found 1875 validated image filenames belonging to 5 classes.


In [145]:
# starting point 
my_model= models.Sequential()

# Add first convolutional block
my_model.add(Conv2D(24, (3, 3), activation='relu', padding='same', 
                    input_shape=(178,218,3)))
my_model.add(MaxPooling2D((2, 2), padding='same'))
# second block
my_model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
my_model.add(MaxPooling2D((2, 2), padding='same'))
# third block
my_model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
my_model.add(MaxPooling2D((2, 2), padding='same'))
# fourth block
my_model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
my_model.add(MaxPooling2D((2, 2), padding='same'))

In [146]:
# global average pooling
#my_model.add(GlobalAveragePooling2D())
# fully connected layer
#my_model.add(Dense(64, activation='relu'))
#my_model.add(BatchNormalization())
# make predictions
my_model.add(Flatten())
my_model.add(Dense(5, activation='softmax'))
# Show a summary of the model. Check the number of trainable parameters
my_model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 178, 218, 16)      448       
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 89, 109, 16)       0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 89, 109, 32)       4640      
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 45, 55, 32)        0         
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 45, 55, 64)        18496     
_________________________________________________________________
max_pooling2d_27 (MaxPooling (None, 23, 28, 64)        0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 23, 28, 128)      

In [148]:


# use early stopping to optimally terminate training through callbacks

es=EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

# save best model automatically
mc= ModelCheckpoint('/../../dataset/cartoon_set', monitor='val_loss', 
                    mode='min', verbose=1, save_best_only=True)
cb_list=[es,mc]


# compile model 
my_model.compile(optimizer='adam', loss='categorical_crossentropy', 
                 metrics=['accuracy'])


In [150]:
history = my_model.fit_generator(
                                train_generator,
                                epochs=25,
                                steps_per_epoch=40,
                                validation_data=validation_generator,
                                validation_steps=40
                                )

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
 1/40 [..............................] - ETA: 1:04 - loss: 0.0321 - accuracy: 1.0000



Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [98]:
test_generator = data_generator.flow_from_dataframe(
        dataframe = training, directory = img,
        x_col = "file_name", y_col = "face_shape"
        target_size=(178, 218),
        batch_size=12,
        class_mode='categorical',
        shuffle=False)


SyntaxError: invalid syntax (<ipython-input-98-a8e496adabfe>, line 4)

In [95]:
test_generator.reset()
pred=saved_model.predict_generator(test_generator, verbose=1, steps=1000)
# determine the maximum activation value for each sample
predicted_class_indices=np.argmax(pred,axis=1)

# label each predicted value to correct gender
labels = (test_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

# format file names to simply male or female
filenames=test_generator.filenames
filenz=[0]
for i in range(0,len(filenames)):
    filenz.append(filenames[i].split('\\')[0])
filenz=filenz[1:]

# determine the test set accuracy
match=[]
for i in range(0,len(filenames)):
    match.append(filenz[i]==predictions[i])
match.count(True)/1000



NameError: name 'test_generator' is not defined