In [2]:
import numpy as np
import pandas as pd
from os import listdir
from os.path import isfile, join
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder
from collections import Counter
import cv2
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score,f1_score
from keras import Model
from tensorflow.keras.layers import Input,Activation,Add, MaxPooling2D, MaxPooling1D, Flatten, Dense, Conv1D, Dropout
from tensorflow import keras
from tensorflow.keras import layers, models
from skimage import color
from skimage import filters
import tensorflow as tf
from tensorflow.keras.regularizers import L1L2

In [3]:
def robertsSobelEdges(imagesArray):
    """
    returns two numpy arrays with pictures in black and white and with the edges define using Roberts filter
    on the first one and the Sobel filter on the second one. The images are in black and white
    """
    sobel_ = []
    for image in imagesArray:
        grayImg = color.rgb2gray(image)
        edge_sobel = filters.sobel(grayImg)
        sobel_.append(edge_sobel)
    return np.array(sobel_)

In [4]:
# Choose the label type
Labeltype = 'age'

In [5]:
# Import image labels and paths to dataframe
mypath = "../data/raw/CroppedImages/"
filenames = np.array([f for f in listdir(mypath) if isfile(join(mypath, f))])
splitcolumns = [x.split('_')[0:3] + [mypath + x] for x in filenames if x.count('_') == 3]
filecolumns = ['age','gender','race','file']
filedf = pd.DataFrame(data = splitcolumns, columns = filecolumns).astype({'age': 'int', 'gender': 'int', 'race': 'int'})

# Convert age to appropriate labels
filedf['age']=np.where((filedf.age<3), 0, filedf.age)
filedf['age']=np.where(((filedf.age>=3) & (filedf.age<6)), 1, filedf.age)
filedf['age']=np.where(((filedf.age>=6) & (filedf.age<9)), 2, filedf.age)
filedf['age']=np.where(((filedf.age>=9) & (filedf.age<12)), 3, filedf.age)
filedf['age']=np.where(((filedf.age>=12) & (filedf.age<21)), 4, filedf.age)
filedf['age']=np.where(((filedf.age>=21) & (filedf.age<36)), 5, filedf.age)
filedf['age']=np.where(((filedf.age>=36) & (filedf.age<51)), 6, filedf.age)
filedf['age']=np.where(((filedf.age>=51) & (filedf.age<80)), 7, filedf.age)
filedf['age']=np.where((filedf.age>=80), 8, filedf.age)

dfx = filedf.loc[:,'file']
dfy = filedf.loc[:,Labeltype]

# Get Train/Test
X_train, X_test, y_train, y_test = train_test_split(dfx, dfy, test_size=0.2, random_state=42)

# Get Train/Test/Validation dataset 0.25 * 8 = 0.2
X_train_2, X_val, y_train_2, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# Encode training labels and initialize random over sampler
yencoded = LabelEncoder().fit_transform(y_train_2)
oversample = RandomOverSampler()

# Over sample until training classes are balanced
X_train_balanced, y_train_balanced = oversample.fit_resample(np.array(X_train_2).reshape(-1,1), yencoded)

# Of all classes, proportionally sample 10000
dxy = {'file': X_train_balanced.reshape(len(X_train_balanced)), 'label': y_train_balanced}
dfbalanced = pd.DataFrame(data = dxy)
balancedsamples = int(5000/len(set(dfbalanced.loc[:,'label'])))
dfbalancedsubset = dfbalanced.groupby('label', group_keys=False).apply(lambda x: x.sample(balancedsamples)).sample(frac=1)

# Count and print balanced classes
counter = Counter(y_train_balanced)
for k,v in counter.items():
    per = v / len(y_train_balanced) * 100
    print('Class=%d, n=%d (%.3f%%)' % (k, v, per))

# Count and print balanced classes subsets (Total should be 10000)
counter = Counter(dfbalancedsubset.loc[:,'label'])
for k,v in counter.items():
    per = v / len(dfbalancedsubset.loc[:,'label']) * 100
    print('Class=%d, n=%d (%.3f%%)' % (k, v, per))

Class=5, n=6093 (11.111%)
Class=2, n=6093 (11.111%)
Class=6, n=6093 (11.111%)
Class=0, n=6093 (11.111%)
Class=8, n=6093 (11.111%)
Class=7, n=6093 (11.111%)
Class=3, n=6093 (11.111%)
Class=4, n=6093 (11.111%)
Class=1, n=6093 (11.111%)
Class=4, n=555 (11.111%)
Class=7, n=555 (11.111%)
Class=0, n=555 (11.111%)
Class=1, n=555 (11.111%)
Class=6, n=555 (11.111%)
Class=8, n=555 (11.111%)
Class=5, n=555 (11.111%)
Class=3, n=555 (11.111%)
Class=2, n=555 (11.111%)


In [6]:
# Get training images
train_img_container = []
for i in dfbalancedsubset.loc[:,'file']:
    img = cv2.imread(i)
    train_img_container.append(img)
train_img_container = np.array(train_img_container)

# Get validation images
validation_img_container = []
for i in X_val:
    img = cv2.imread(i)
    validation_img_container.append(img)
validation_img_container = np.array(validation_img_container)

# Get testing images
test_img_container = []
for i in X_test:
    img = cv2.imread(i)
    test_img_container.append(img)
test_img_container = np.array(test_img_container)

# Convert images to sobel edges
train_img_container = robertsSobelEdges(train_img_container)
validation_img_container = robertsSobelEdges(validation_img_container)
test_img_container = robertsSobelEdges(test_img_container)

In [7]:
# Expand array dimensions to 4D so that they may be used as inputs
X_train_expand = tf.expand_dims(train_img_container, axis=-1)
X_val_expand = tf.expand_dims(validation_img_container, axis=-1)
X_test_expand = tf.expand_dims(test_img_container, axis=-1)

In [8]:
X_train_expand.shape

TensorShape([4995, 200, 200, 1])

In [9]:
X_test_expand.shape

TensorShape([4741, 200, 200, 1])

In [11]:
l1array = [1,0,0.0001,0.001,0.01,0.1]
l2array = [1,0,0.0001,0.001,0.01,0.1]
dropoutarray = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]

In [37]:
l1history = []
l2history = []
dropouthistory = []
valhistory = []
testhistory = []
conhistory = []

In [40]:
def CNNmodeliteration(l1array,l2array,dropoutarray):
    for l1val in l1array:
        for l2val in l2array:
            for dropval in dropoutarray:
                l1reg = l1val
                l2reg = l2val
                dropoutn = dropval
                # Pick number of final nodes based on label picked
                Nlabels = -1
                if(Labeltype == 'age'):
                    Nlabels = 9
                if(Labeltype == 'gender'):
                    Nlabels = 2
                if(Labeltype == 'race'):
                    Nlabels = 5
                
                # Model creation
                CNNmodel = models.Sequential()
                CNNmodel.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=X_train_expand[0].shape, kernel_regularizer = L1L2(l1=l1reg, l2=l2reg)))
                CNNmodel.add(Dropout(dropoutn))
                CNNmodel.add(layers.MaxPooling2D((2, 2)))
                CNNmodel.add(layers.Conv2D(32, (3, 3), activation='relu',kernel_regularizer = L1L2(l1=l1reg, l2=l2reg)))
                CNNmodel.add(Dropout(dropoutn))
                CNNmodel.add(layers.MaxPooling2D((2, 2)))
                CNNmodel.add(layers.Conv2D(32, (3, 3), activation='relu',kernel_regularizer = L1L2(l1=l1reg, l2=l2reg)))
                CNNmodel.add(Dropout(dropoutn))
                CNNmodel.add(layers.MaxPooling2D((2, 2)))
                CNNmodel.add(layers.Conv2D(32, (3, 3), activation='relu',kernel_regularizer = L1L2(l1=l1reg, l2=l2reg)))
                CNNmodel.add(Dropout(dropoutn))
                CNNmodel.add(layers.MaxPooling2D((2, 2)))
                CNNmodel.add(layers.Conv2D(32, (3, 3), activation='relu',kernel_regularizer = L1L2(l1=l1reg, l2=l2reg)))
                CNNmodel.add(Dropout(dropoutn))
                CNNmodel.add(layers.MaxPooling2D((2, 2)))
                CNNmodel.add(layers.Flatten())
                CNNmodel.add(layers.Dense(64, activation='relu'))
                CNNmodel.add(layers.Dense(Nlabels, activation='softmax'))
                CNNmodel.compile(optimizer='adam',
                              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                              metrics=['accuracy'])
                
                # Validation training and testing
                val_history = CNNmodel.fit(X_train_expand, dfbalancedsubset.loc[:,'label'], epochs=5, validation_data=(X_test_expand, y_test))
                # Get validation accuracy and confusion matrix
                val_accuracy = val_history.history['val_accuracy']
                ypred = CNNmodel.predict(X_val_expand)
                valconmatrix = confusion_matrix(np.argmax(ypred, axis=-1),y_val)
                print(val_accuracy)
                # Get test accuracy and confusion matrix
                ypred = CNNmodel.predict(X_test_expand)
                testconmatrix = confusion_matrix(np.argmax(ypred, axis=-1),y_test)
                test_accuracy = accuracy_score(y_test, np.argmax(ypred, axis=-1))
                print(test_accuracy)
                print(l1reg)
                print(l2reg)
                print(dropoutn)
                l1history.append(l1reg)
                l2history.append(l2reg)
                dropouthistory.append(dropoutn)
                valhistory.append(val_accuracy)
                testhistory.append(test_accuracy)
                conhistory.append(testconmatrix)

In [78]:
CNNmodeliteration(l1array,[0],[0])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.02256907895207405, 0.06918371468782425, 0.02256907895207405, 0.16726429760456085, 0.16726429760456085]
0.1672642902341278
1
0
0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.284539133310318, 0.3963298797607422, 0.4136258065700531, 0.41763341426849365, 0.47373971343040466]
0.4737397173592069
0
0
0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.208394855260849, 0.3516135811805725, 0.399282842874527, 0.40982913970947266, 0.438726007938385]
0.4387260071714828
0.0001
0
0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.02256907895207405, 0.4353511929512024, 0.16852985322475433, 0.06412149220705032, 0.16726429760456085]
0.1672642902341278
0.001
0
0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.031638894230127335, 0.031638894230127335, 0.06412149220705032, 0.4353511929512024, 0.02256907895207405]
0.02256907825353301
0.01
0
0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.02784222736954689, 0.031638894230127335,

In [12]:
Results = pd.read_csv('neuralnetworkresults.csv')

In [13]:
Results.head()

Unnamed: 0,l1,l2,dropout,valacc
0,0.0,0.0,0.0,[0.34127822518348694]
1,1.0,0.0,0.0,"[0.02256907895207405, 0.06918371468782425, 0.0..."
2,0.0,0.0,0.0,"[0.284539133310318, 0.3963298797607422, 0.4136..."
3,0.0001,0.0,0.0,"[0.208394855260849, 0.3516135811805725, 0.3992..."
4,0.001,0.0,0.0,"[0.02256907895207405, 0.4353511929512024, 0.16..."


In [32]:
Results['bestval'] = Results.loc[:,'valacc'].apply(lambda x: max([float(y) for y in x.replace(' ','').replace('[','').replace(']','').split(',')]))

In [33]:
Results.head()

Unnamed: 0,l1,l2,dropout,valacc,bestval
0,0.0,0.0,0.0,[0.34127822518348694],0.341278
1,1.0,0.0,0.0,"[0.02256907895207405, 0.06918371468782425, 0.0...",0.167264
2,0.0,0.0,0.0,"[0.284539133310318, 0.3963298797607422, 0.4136...",0.47374
3,0.0001,0.0,0.0,"[0.208394855260849, 0.3516135811805725, 0.3992...",0.438726
4,0.001,0.0,0.0,"[0.02256907895207405, 0.4353511929512024, 0.16...",0.435351


In [35]:
Results[Results['bestval']>0.45]

Unnamed: 0,l1,l2,dropout,valacc,bestval
2,0.0,0.0,0.0,"[0.284539133310318, 0.3963298797607422, 0.4136...",0.47374
8,0.0,0.0,0.0,"[0.3670111894607544, 0.4197426736354828, 0.473...",0.47374
9,0.0,0.0001,0.0,"[0.2952963411808014, 0.4321872889995575, 0.455...",0.455389
10,0.0,0.001,0.0,"[0.29761654138565063, 0.4351402521133423, 0.33...",0.457287
14,0.0,0.0,0.1,"[0.31153765320777893, 0.47036489844322205, 0.3...",0.477536
16,0.0,0.0,0.3,"[0.3296772837638855, 0.39105674624443054, 0.41...",0.510019


In [38]:
l1array = [1,0.1,0.01,0.001,0.00001,0.000001]
l2array = [0.0001]
dropoutarray = [0.0]

In [41]:
CNNmodeliteration(l1array,l2array,dropoutarray)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.031638894230127335, 0.16852985322475433, 0.16852985322475433, 0.06412149220705032, 0.02256907895207405]
0.02256907825353301
1
0.0001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.4353511929512024, 0.16726429760456085, 0.16726429760456085, 0.06918371468782425, 0.16852985322475433]
0.16852984602404555
0.1
0.0001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.013499261811375618, 0.4353511929512024, 0.013499261811375618, 0.16726429760456085, 0.02256907895207405]
0.02256907825353301
0.01
0.0001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.4353511929512024, 0.16726429760456085, 0.06412149220705032, 0.16852985322475433, 0.06412149220705032]
0.0641214933558321
0.001
0.0001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.25732967257499695, 0.31491246819496155, 0.4218519330024719, 0.4427335858345032, 0.3965408205986023]
0.3965408141742248
1e-05
0.0001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5

In [42]:
l1array = [1,0.1,0.01,0.001,0.00001,0.000001]
l2array = [0.001]
dropoutarray = [0.0]

In [43]:
CNNmodeliteration(l1array,l2array,dropoutarray)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.02256907895207405, 0.02256907895207405, 0.16852985322475433, 0.4353511929512024, 0.4353511929512024]
0.43535119173170217
1
0.001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.16726429760456085, 0.02784222736954689, 0.16726429760456085, 0.06918371468782425, 0.02784222736954689]
0.027842227378190254
0.1
0.001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.06918371468782425, 0.16852985322475433, 0.4353511929512024, 0.013499261811375618, 0.16852985322475433]
0.16852984602404555
0.01
0.001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.013499261811375618, 0.02256907895207405, 0.031638894230127335, 0.06918371468782425, 0.02256907895207405]
0.02256907825353301
0.001
0.001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.2176755964756012, 0.3277789354324341, 0.4385150671005249, 0.370386004447937, 0.36933135986328125]
0.3693313646909935
1e-05
0.001
0.0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0

In [44]:
l2array = [0.001,0.0001,0.00001,0.000001]
l1array = [0.0000]
dropoutarray = [0.3]

In [45]:
CNNmodeliteration(l1array,l2array,dropoutarray)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.4039232134819031, 0.45349082350730896, 0.49230119585990906, 0.4355621039867401, 0.3963298797607422]
0.39632988820923853
0.0
0.001
0.3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.27736765146255493, 0.3670111894607544, 0.4323982298374176, 0.4292343258857727, 0.35245728492736816]
0.35245728749209027
0.0
0.0001
0.3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.3807213604450226, 0.500738263130188, 0.3233495056629181, 0.5155030488967896, 0.43914785981178284]
0.4391478591014554
0.0
1e-05
0.3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.4477958381175995, 0.46129509806632996, 0.4229065477848053, 0.3872600793838501, 0.39717358350753784]
0.39717359206918373
0.0
1e-06
0.3


In [46]:
l2array = [0]
l1array = [0.001,0.0001,0.00001,0.000001]
dropoutarray = [0.3]

In [47]:
CNNmodeliteration(l1array,l2array,dropoutarray)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.4585530459880829, 0.40497785806655884, 0.45180341601371765, 0.4665682315826416, 0.47184139490127563]
0.4718413836743303
0.001
0
0.3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.47395065426826477, 0.5121282339096069, 0.43640580773353577, 0.4977852702140808, 0.40223580598831177]
0.40223581522885465
0.0001
0
0.3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.42353934049606323, 0.3735498785972595, 0.4568656384944916, 0.43598395586013794, 0.38219785690307617]
0.38219784855515715
1e-05
0
0.3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.40202489495277405, 0.4218519330024719, 0.34844970703125, 0.44083526730537415, 0.3855726718902588]
0.3855726639949378
1e-06
0
0.3


In [48]:
Results = pd.DataFrame(data = np.transpose(np.array([l1history,l2history,dropouthistory,valhistory])), columns =['l1','l2','dropout','valacc'])

  Results = pd.DataFrame(data = np.transpose(np.array([l1history,l2history,dropouthistory,valhistory])), columns =['l1','l2','dropout','valacc'])


In [49]:
Results.to_csv('neuralnetworkresults2.csv')

In [50]:
connp = np.array(conhistory)

In [51]:
np.save('neuralnetworkconmatrices2.npy',connp)