In [1]:
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

import pandas as pd
import numpy as np
import keras
from keras.utils.data_utils import get_file
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from os.path import join
import multiprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

%matplotlib inline

Using TensorFlow backend.


In [2]:
DATA_HOME = '../CASIA-WebFace-aligned-cropped/'

In [3]:
dataset = pd.read_csv('webface.aligned.train.csv', nrows=None)

In [4]:
encoder = LabelEncoder()
encoder.fit(dataset['person'])
dataset['person_id'] = encoder.transform(dataset['person'])

In [5]:
dataset.head()

Unnamed: 0,person,count,path,person_id
0,3331486,13,3331486/007.png,1107
1,3331486,13,3331486/002.png,1107
2,3331486,13,3331486/008.png,1107
3,3331486,13,3331486/006.png,1107
4,3331486,13,3331486/012.png,1107


In [6]:
np.mean(dataset['count'])

52.41738783526668

In [7]:
y = dataset['person_id'].as_matrix()

In [8]:
img_paths = [r.path for r in dataset.itertuples()]
print(len(img_paths))

65172


In [9]:
def path2ImgVec(path):
    x = img_to_array(load_img(join(DATA_HOME, path)))
    return x.reshape((1,) + x.shape)

In [10]:
pool = multiprocessing.Pool(8)
results = pool.map(path2ImgVec, img_paths)
pool.close()
pool.join()

In [11]:
X = np.vstack(results)

In [12]:
X.shape

(65172, 55, 47, 3)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42)

In [14]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Activation, Flatten, Input, concatenate
from keras.utils import np_utils
from keras.layers import LSTM
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score
from keras import metrics
from keras.callbacks import Callback
from keras.layers.normalization import BatchNormalization
from keras.engine import Model
from keras import optimizers

nb_class = len(np.unique(y))
print('nb_class', nb_class)
hidden_dim = 160
best_weights_filepath = '../models/best_weights.hdf5'

def build_model():
    image_input = Input(shape=X.shape[1:])
    
    conv1 = Conv2D(20, (4, 4), name='conv1')(image_input)
    conv1 = BatchNormalization()(conv1)
    conv1 = Activation('relu')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1)
#     pool1 = Dropout(rate=0.2)(pool1)
    
    conv2 = Conv2D(40, (3, 3), name='conv2')(pool1)
    conv2 = BatchNormalization()(conv2)
    conv2 = Activation('relu')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2)
#     pool2 = Dropout(rate=0.2)(pool2)

    conv3 = Conv2D(60, (3, 3), name='conv3')(pool2)
    conv3 = BatchNormalization()(conv3)
    conv3 = Activation('relu')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3)

    flat1 = Flatten(name='flat1')(pool3)
    
    conv4 = Conv2D(80, (2, 2), name='conv4')(pool3)
    conv4 = BatchNormalization()(conv4)
    conv4 = Activation('relu')(conv4)
    flat2 = Flatten(name='flat2')(conv4)
    
    merged = concatenate([flat1, flat2])
    
    out = Dense(hidden_dim, name='hidden1')(merged)
    out = BatchNormalization()(out)
    out = Activation('relu',name='deepid')(out)
    out = Dense(nb_class, activation='softmax', name='softmax_class')(out)
    
    model = Model(inputs=image_input, outputs=out)

    optimizer = optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=(1e-4))

    model.compile(
        optimizer='adam', #rmsprop
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'],
    )
    
    print(model.summary())
    return model

nb_class 2025


In [15]:
model = None
# with tf.device('/gpu:0'):
model = build_model()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 55, 47, 3)     0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 52, 44, 20)    980                                          
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 52, 44, 20)    80                                           
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 52, 44, 20)    0                                            
___________________________________________________________________________________________

In [16]:
saveBestModel = keras.callbacks.ModelCheckpoint(
    best_weights_filepath, 
    monitor='val_acc', 
    verbose=0, 
    save_best_only=True, 
    mode='auto'
)
earlyStopping=keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=10, 
    verbose=1, 
    mode='auto'
)

model.fit(
    X_train, 
    y_train, 
    batch_size=512, 
    epochs=15,
    verbose=2, 
    validation_split=0.1, 
    shuffle=True,
    callbacks=[saveBestModel, earlyStopping],
)

Train on 52788 samples, validate on 5866 samples
Epoch 1/15
9s - loss: 7.1602 - acc: 0.0214 - val_loss: 7.1669 - val_acc: 0.0217
Epoch 2/15
8s - loss: 6.1804 - acc: 0.0654 - val_loss: 6.2654 - val_acc: 0.0694
Epoch 3/15
8s - loss: 5.3131 - acc: 0.1383 - val_loss: 5.5493 - val_acc: 0.1209
Epoch 4/15
8s - loss: 4.5063 - acc: 0.2331 - val_loss: 5.1659 - val_acc: 0.1504
Epoch 5/15
8s - loss: 3.7835 - acc: 0.3375 - val_loss: 5.1028 - val_acc: 0.1700
Epoch 6/15
8s - loss: 3.1559 - acc: 0.4389 - val_loss: 4.4137 - val_acc: 0.2526
Epoch 7/15
8s - loss: 2.6110 - acc: 0.5317 - val_loss: 4.3778 - val_acc: 0.2591
Epoch 8/15
8s - loss: 2.1518 - acc: 0.6107 - val_loss: 4.4466 - val_acc: 0.2511
Epoch 9/15
8s - loss: 1.7600 - acc: 0.6852 - val_loss: 4.4009 - val_acc: 0.2760
Epoch 10/15
8s - loss: 1.4319 - acc: 0.7455 - val_loss: 4.1851 - val_acc: 0.3099
Epoch 11/15
8s - loss: 1.1531 - acc: 0.8030 - val_loss: 4.2675 - val_acc: 0.2980
Epoch 12/15
8s - loss: 0.9069 - acc: 0.8541 - val_loss: 4.3697 - val_

<keras.callbacks.History at 0x7f42312f8550>

In [17]:
model.load_weights(best_weights_filepath)

In [18]:
model.save('../models/webface-simple-cnn.aligned.model.h5')

In [19]:
model.evaluate(X_test, y_test, batch_size=512, verbose=2)

[4.0945967028718231, 0.31221233601400405]

In [20]:
import gc
gc.collect()

0