In [1]:
!pip install keras_vggface

Collecting keras_vggface
  Downloading https://files.pythonhosted.org/packages/11/9c/d249cf4998344806d71b0351db690917413d1f7eaab83805f4095375e7a1/keras_vggface-0.5.tar.gz
Building wheels for collected packages: keras-vggface
  Building wheel for keras-vggface (setup.py) ... [?25l[?25hdone
  Stored in directory: /root/.cache/pip/wheels/72/aa/01/eb7baeb2f6e2d2f0d2aabddb5f01d57fa22fbd019ee2799bf5
Successfully built keras-vggface
Installing collected packages: keras-vggface
Successfully installed keras-vggface-0.5


In [0]:
import pandas as pd
import numpy as np
import numpy.matlib
import matplotlib.pyplot as plt

In [0]:
from tqdm import tqdm_notebook as tqdm
import os
from glob import glob
from random import choice, sample
from collections import defaultdict

In [4]:
from keras.preprocessing.image import load_img, array_to_img, img_to_array

Using TensorFlow backend.


In [0]:
from keras_vggface.utils import preprocess_input

In [0]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract
from keras.models import Model
from keras.optimizers import Adam
from keras_vggface.vggface import VGGFace
import keras

#data

In [8]:
!unzip recognizing-faces-in-the-wild.zip
!unzip train.zip -d train
!unzip test.zip -d test

Archive:  recognizing-faces-in-the-wild.zip
  inflating: test.zip                
  inflating: train.zip               
  inflating: sample_submission.csv   
  inflating: train_relationships.csv  
Archive:  train.zip
   creating: train/F0002/
   creating: train/F0005/
   creating: train/F0009/
   creating: train/F0010/
   creating: train/F0016/
   creating: train/F0017/
   creating: train/F0020/
   creating: train/F0022/
   creating: train/F0023/
   creating: train/F0024/
   creating: train/F0030/
   creating: train/F0031/
   creating: train/F0036/
   creating: train/F0038/
   creating: train/F0039/
   creating: train/F0040/
   creating: train/F0041/
   creating: train/F0044/
   creating: train/F0049/
   creating: train/F0050/
   creating: train/F0051/
   creating: train/F0052/
   creating: train/F0054/
   creating: train/F0058/
   creating: train/F0059/
   creating: train/F0063/
   creating: train/F0064/
   creating: train/F0068/
   creating: train/F0070/
   creating: train/F0074/
   

In [0]:
train_file_path = "./train_relationships.csv"
train_folders_path = "./train/"
val_famillies = "F09"

In [0]:
all_images = glob(train_folders_path + "*/*/*.jpg")

In [0]:
train_images = [x for x in all_images if val_famillies not in x]
val_images = [x for x in all_images if val_famillies in x]

In [0]:
train_person_to_images_map = defaultdict(list)

In [0]:
ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]

In [0]:
for x in train_images:
    train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

In [15]:
train_person_to_images_map

defaultdict(list,
            {'F0438/MID3': ['./train/F0438/MID3/P04622_face2.jpg',
              './train/F0438/MID3/P04620_face4.jpg',
              './train/F0438/MID3/P04625_face3.jpg',
              './train/F0438/MID3/P04617_face2.jpg',
              './train/F0438/MID3/P04609_face3.jpg',
              './train/F0438/MID3/P04632_face3.jpg',
              './train/F0438/MID3/P04623_face3.jpg',
              './train/F0438/MID3/P04621_face2.jpg',
              './train/F0438/MID3/P04616_face2.jpg',
              './train/F0438/MID3/P04628_face3.jpg',
              './train/F0438/MID3/P04627_face2.jpg',
              './train/F0438/MID3/P04610_face2.jpg',
              './train/F0438/MID3/P04637_face3.jpg',
              './train/F0438/MID3/P04612_face1.jpg'],
             'F0438/MID6': ['./train/F0438/MID6/P04631_face2.jpg',
              './train/F0438/MID6/P04630_face3.jpg',
              './train/F0438/MID6/P04629_face2.jpg',
              './train/F0438/MID6/P04637_face4.jpg',

In [0]:
val_person_to_images_map = defaultdict(list)

In [0]:
for x in val_images:
    val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

In [0]:
relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))
relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]

In [19]:
len(relationships)

3362

In [0]:
train = [x for x in relationships if val_famillies not in x[0]]
val = [x for x in relationships if val_famillies in x[0]]

In [0]:
def read_img(path):
    img = load_img(path)
    img = np.array(img).astype(np.float)
    return preprocess_input(img, version=2)

In [0]:
def gen(list_tuples, person_to_images_map, batch_size=16):
    ppl = list(person_to_images_map.keys())
    while True:
        batch_tuples = sample(list_tuples, batch_size // 2)
        labels = [1] * len(batch_tuples)
        while len(batch_tuples) < batch_size:
            p1 = choice(ppl)
            p2 = choice(ppl)

            if p1 != p2 and (p1, p2) not in list_tuples and (p2, p1) not in list_tuples:
                batch_tuples.append((p1, p2))
                labels.append(0)

        for x in batch_tuples:
            if not len(person_to_images_map[x[0]]):
                print(x[0])

        X1 = [choice(person_to_images_map[x[0]]) for x in batch_tuples]
        X1 = np.array([read_img(x) for x in X1])

        X2 = [choice(person_to_images_map[x[1]]) for x in batch_tuples]
        X2 = np.array([read_img(x) for x in X2])

        yield [X1, X2], labels

In [0]:
def baseline_model():
    input_1 = Input(shape=(224, 224, 3))
    input_2 = Input(shape=(224, 224, 3))

    base_model = VGGFace(model='resnet50', include_top=False)

    for x in base_model.layers[:-3]:
        x.trainable = True

    x1 = base_model(input_1)
    x2 = base_model(input_2)

    x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1)])
    x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2)])

    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])

    x1_ = Multiply()([x1, x1])
    x2_ = Multiply()([x2, x2])
    x4 = Subtract()([x1_, x2_])
    
    x = Concatenate(axis=-1)([x4, x3])

    x = Dense(100, activation="relu")(x)
    x = Dropout(0.01)(x)
    out = Dense(1, activation="sigmoid")(x)

    model = Model([input_1, input_2], out)

    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(1e-5))

    model.summary()

    return model


In [0]:
file_path = "vgg_face.h5"

In [0]:
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.8, patience=5, verbose=1)

callbacks_list = [checkpoint, reduce_on_plateau]

In [31]:
model = baseline_model()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_5 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
vggface_resnet50 (Model)        multiple             23561152    input_4[0][0]                    
                                                                 input_5[0][0]                    
__________________________________________________________________________________________________
global_max_pooling2d_3 (GlobalM (None, 2048)         0           vggface_resnet50[1][0]           
__________

In [33]:
model.fit_generator(
    gen(train, train_person_to_images_map, batch_size=16), 
    use_multiprocessing=True,
    validation_data=gen(val, val_person_to_images_map, batch_size=16), 
    epochs=100, 
    callbacks=callbacks_list, 
    steps_per_epoch=200, 
    validation_steps=100,
    workers=4 
)




Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.60188, saving model to vgg_face.h5
Epoch 2/100

Epoch 00002: val_acc improved from 0.60188 to 0.62562, saving model to vgg_face.h5
Epoch 3/100

Epoch 00003: val_acc did not improve from 0.62562
Epoch 4/100

Epoch 00004: val_acc did not improve from 0.62562
Epoch 5/100

Epoch 00005: val_acc did not improve from 0.62562
Epoch 6/100

Epoch 00006: val_acc did not improve from 0.62562
Epoch 7/100

Epoch 00007: val_acc improved from 0.62562 to 0.64687, saving model to vgg_face.h5
Epoch 8/100

Epoch 00008: val_acc improved from 0.64687 to 0.68688, saving model to vgg_face.h5
Epoch 9/100

Epoch 00009: val_acc did not improve from 0.68688
Epoch 10/100

Epoch 00010: val_acc did not improve from 0.68688
Epoch 11/100

Epoch 00011: val_acc improved from 0.68688 to 0.71625, saving model to vgg_face.h5
Epoch 12/100

Epoch 00012: val_acc did not improve from 0.71625
Epoch 13/100

Epoch 00013: val_acc did not improve from 0.71625
Epoch 14/100

E

<keras.callbacks.History at 0x7f92be269160>

In [0]:
test_path = "./test/"

In [0]:
def chunker(seq, size=32):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [37]:
from tqdm import tqdm

submission = pd.read_csv('./sample_submission.csv')

predictions = []

for batch in tqdm(chunker(submission.img_pair.values)):
    X1 = [x.split("-")[0] for x in batch]
    X1 = np.array([read_img(test_path + x) for x in X1])

    X2 = [x.split("-")[1] for x in batch]
    X2 = np.array([read_img(test_path + x) for x in X2])

    pred = model.predict([X1, X2]).ravel().tolist()
    predictions += pred

submission['is_related'] = predictions

submission.to_csv("vgg_face.csv", index=False)

166it [02:05,  1.05it/s]
