In [5]:
import pandas as pd
import numpy as np
import keras
from keras.utils.data_utils import get_file
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from os.path import join
import multiprocessing
from scipy.misc import imresize
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

%matplotlib inline

In [6]:
DATA_HOME = '../CASIA-WebFace'

In [7]:
dataset = pd.read_csv('webface.train.csv', nrows=5000)

In [8]:
encoder = LabelEncoder()
encoder.fit(dataset['person'])
dataset['person_id'] = encoder.transform(dataset['person'])

In [9]:
dataset.head()

Unnamed: 0,person,count,path,person_id
0,2928029,201,2928029/079.jpg,47
1,2928029,201,2928029/097.jpg,47
2,2928029,201,2928029/103.jpg,47
3,2928029,201,2928029/191.jpg,47
4,2928029,201,2928029/169.jpg,47


In [10]:
y = dataset['person_id'].as_matrix()

In [11]:
img_paths = [r.path for r in dataset.itertuples()]
print(len(img_paths))

5000


In [12]:
def path2ImgVec(path):
    x = imresize(load_img(join(DATA_HOME, path)), (224, 224))
    return x.reshape((1,) + x.shape)

In [13]:
pool = multiprocessing.Pool(8)
results = pool.map(path2ImgVec, img_paths)
pool.close()
pool.join()

In [14]:
X = np.vstack(results)

In [15]:
X.shape

(5000, 224, 224, 3)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [13]:
from keras.engine import  Model
from keras.layers import Flatten, Dense, Input
from keras_vggface.vggface import VGGFace

#custom parameters
nb_class = len(np.unique(y))
print('nb_class', nb_class)
hidden_dim = 2048

image_input = Input(shape=X.shape[1:])

vgg_model = VGGFace(input_tensor=image_input, include_top=False)
last_layer = vgg_model.get_layer('pool5').output
x = Flatten(name='flatten')(last_layer)
x = Dense(hidden_dim, activation='relu', name='fc6')(x)
x = Dense(hidden_dim, activation='relu', name='fc7')(x)
out = Dense(nb_class, activation='softmax', name='fc8')(x)
custom_vgg_model = Model(image_input, out)

nb_class 161


In [14]:
l = custom_vgg_model.get_layer('fc8')
l.units

161

In [15]:
custom_vgg_model.compile(
    optimizer='adadelta',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [16]:
custom_vgg_model.fit(
    X_train, 
    y_train, 
    batch_size=64, 
    epochs=5, 
    verbose=2, 
    validation_split=0.2, 
    shuffle=True)

Train on 3200 samples, validate on 800 samples
Epoch 1/5
62s - loss: 15.3127 - acc: 0.0491 - val_loss: 15.2921 - val_acc: 0.0512
Epoch 2/5
58s - loss: 15.2870 - acc: 0.0516 - val_loss: 15.2921 - val_acc: 0.0512
Epoch 3/5
57s - loss: 15.2870 - acc: 0.0516 - val_loss: 15.2921 - val_acc: 0.0512
Epoch 4/5
57s - loss: 15.2870 - acc: 0.0516 - val_loss: 15.2921 - val_acc: 0.0512
Epoch 5/5
58s - loss: 15.2870 - acc: 0.0516 - val_loss: 15.2921 - val_acc: 0.0512


<keras.callbacks.History at 0x7fc5a4052e48>

In [32]:
custom_vgg_model = None
import gc
gc.collect()

0

In [33]:
from keras import backend as K
K.clear_session()