# facial keypoints detector

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from os import path as opath
import os
import sys
from PIL import Image
import errno
sns.set(style="whitegrid", color_codes=True)
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
# setting 
DATA_PATH = 'data/'
MODEL_PATH = 'models/'

## Downloading data

In [3]:
%mkdir -p $DATA_PATH
%pwd
%cd $DATA_PATH
%pwd
!kg download -c facial-keypoints-detector
%ls
%cd ..
%pwd

/Users/dorian/WorkSpace/kaggle_fun/facial_keypoints_detector/data
downloading https://www.kaggle.com/c/facial-keypoints-detector/download/train.csv

train.csv already downloaded !
downloading https://www.kaggle.com/c/facial-keypoints-detector/download/test.csv

test.csv already downloaded !
downloading https://www.kaggle.com/c/facial-keypoints-detector/download/train_identity.csv

train_identity.csv already downloaded !
[1m[36mtest[m[m/               [1m[36mtrain[m[m/              train_identity.csv
test.csv            train.csv           [1m[36mvalid[m[m/
/Users/dorian/WorkSpace/kaggle_fun/facial_keypoints_detector


u'/Users/dorian/WorkSpace/kaggle_fun/facial_keypoints_detector'

## Preprocess data

In [6]:
def convert_pixels(pix_str):
    return np.array([int(p) for p in pix_str.split(' ')], 'uint8').reshape((48, 48))


def load_data():
    df = pd.read_csv(opath.join(DATA_PATH, 'train.csv'), converters={'Pixels': convert_pixels})
    return df

data = load_data()

In [31]:
def decode_label(df):
    df = df.copy()
    df.loc[df['Emotion'] == 0, 'Emotion'] = 'anger'
    df.loc[df['Emotion'] == 1, 'Emotion'] = 'disgust'
    df.loc[df['Emotion'] == 2, 'Emotion'] = 'fear'
    df.loc[df['Emotion'] == 3, 'Emotion'] = 'happy'
    df.loc[df['Emotion'] == 4, 'Emotion'] = 'sad'
    df.loc[df['Emotion'] == 5, 'Emotion'] = 'surprise'
    df.loc[df['Emotion'] == 6, 'Emotion'] = 'neutral'
    return df

In [32]:
decoded_data = decode_label(data)
remove_neutral_data = decoded_data[decoded_data['Emotion'] == 'neutral'].sample(700)
remove_happy_data = decoded_data[decoded_data['Emotion'] == 'happy'].sample(300)
transformed_data = decoded_data.drop((remove_happy_data+remove_neutral_data).index)
test_data = transformed_data.sample(frac=0.1)
transformed_data = transformed_data.drop(test_data.index)
valid_data = transformed_data.sample(frac=0.2)
train_data = transformed_data.drop(valid_data.index)

In [33]:
def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
            
            
def save_img_files(df, base):
    sub_path = opath.join(DATA_PATH, base)
    for idx, row in df.iterrows():
        category_path = opath.join(sub_path, row['Emotion'])
        mkdir(category_path)
        Image.fromarray(row['Pixels']).save(opath.join(category_path, '{}.png'.format(idx)), 'png')

In [34]:
save_img_files(test_data, 'test')
save_img_files(valid_data, 'valid')
save_img_files(train_data, 'train')

In [6]:
from utils.vgg16 import get_batches
batch_size = 16
train_batches = get_batches(opath.join(DATA_PATH, 'train'), batch_size=batch_size)
valid_batches = get_batches(opath.join(DATA_PATH, 'valid'), batch_size=batch_size*2)
test_batches = get_batches(opath.join(DATA_PATH, 'test'), batch_size=batch_size)

Found 2288 images belonging to 7 classes.
Found 572 images belonging to 7 classes.
Found 318 images belonging to 7 classes.


## Vanilla model

In [8]:
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import MaxPooling2D, ZeroPadding2D, Conv2D


def get_bench_model():
    model = Sequential()
    model.add(Conv2D(16, (3, 3), activation='relu', input_shape=(3, 224, 224)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(7, activation='relu'))
    return model

bench_model = get_bench_model()
bench_model.compile(optimizer='RMSprop', loss='categorical_crossentropy', metrics=['accuracy'])
bench_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_26 (Conv2D)           (None, 16, 222, 222)      448       
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 32, 220, 220)      4640      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 32, 110, 110)      0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 64, 108, 108)      18496     
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 64, 54, 54)        0         
_________________________________________________________________
conv2d_29 (Conv2D)           (None, 128, 52, 52)       73856     
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 128, 26, 26)       0         
__________

In [None]:
from keras.callbacks import ModelCheckpoint

weigths_name = 'bench.weigths.best.hdf5'
checkpointer = ModelCheckpoint(filepath=opath.join(MODEL_PATH, weigths_name), 
                               verbose=1, save_best_only=True)
bench_model.fit_generator(train_batches, samples_per_epoch=batches.nb_sample, nb_epoch=2, 
                          validation_data=valid_batches, nb_val_samples=val_batches.nb_sample,
                          callbacks=[checkpointer],)
bench_model.load_weigths(weigths_name)

In [3]:
from utils.vgg16 import get_model
vgg_model = get_model()

Using Theano backend.


In [37]:
vgg_model.pop()
for layer in model.layers:
    layer.trainable = False
vgg_model.add(Dense(7, activation='softmax'))
vgg_model.compile(optimizer='RMSprop', loss='categorical_crossentropy', metrics=['accuracy'])

NameError: name 'vgg_model' is not defined