# Face Detection

Hello! In this task you will create your own deep face detector.

First of all, we need import some usefull stuff.

In [None]:
%load_ext autoreload
%autoreload 2
%pylab inline

In [None]:
from keras import backend as K

In [None]:
from matplotlib import pyplot as plt
import numpy as np
from skimage import transform

In [None]:
from get_data import load_dataset, unpack

Then we can load dataset.

Each image in train, validation and test datasets have shape (176, 176, 3), but part of this image is black background. Interesting image aligned at top left corner.

Bounding boxes define face in image and consist of 7 integer numbers: [image_index, min_row, min_col, max_row, max_col]. Bounding box width and height are 32 +/- 8 pixels wide.

`train_bboxes` and `val_bboxes` is a list of bboxes.

`train_shapes` and `val_shapes` is a list of interesting image shapes.

In [None]:
train_images, train_bboxes, train_shapes = load_dataset("train")
val_images, val_bboxes, val_shapes = load_dataset("val")

## Prepare data

For learning we should extract positive and negative samples from image.
Positive and negative samples counts should be similar.
Every samples should have same size.

In [None]:
SAMPLE_SHAPE = (32, 32, 3)

In [None]:
from scores import iou_score

def is_new_bbox(new_bbox, true_bboxes, eps=1e-1):
    """There bbox is 4 ints [min_row, min_col, max_row, max_col] without image index."""
    for bbox in true_bboxes:
        if iou_score(new_bbox, bbox) >= eps:
            return False
    return True

In [None]:
def get_positive_negative(images, true_bboxes, image_shapes):
    """Retrieve positive and negative samples from image."""
    positive = []
    negative = []

    # Write code there

    return positive, negative

In [None]:
from keras.utils import to_categorical

def get_samples(images, true_bboxes, image_shapes):
    """Usefull samples for learning.

    X - positive and negative samples.
    Y - one hot encoded list of zeros and ones. One is positive marker.
    """
    positive, negative = get_positive_negative(images=images, true_bboxes=true_bboxes, image_shapes=image_shapes)
    X = positive
    y = [1] * len(positive)

    X.extend(negative)
    y.extend([0] * len(negative))

    return np.array(X), to_categorical(y)

In [None]:
def visualize_samples(data, n_cols=5, n_rows=1):
    """Visualize samples."""
    figure(figsize = (3*n_cols,3*n_rows))
    for n,i in enumerate(np.random.randint(len(data), size = n_cols*n_rows)):
        plt.subplot(n_rows,n_cols,n+1)
        plt.axis('off')
        plt.imshow(data[i])
    plt.show()

In [None]:
X_train, Y_train = get_samples(train_images, train_bboxes, train_shapes)
X_val, Y_val = get_samples(val_images, val_bboxes, val_shapes)

In [None]:
# There we should see faces
visualize_samples(X_train[Y_train[:, 1] == 1])

In [None]:
# There we shouldn't see faces
visualize_samples(X_train[Y_train[:, 1] == 0])

## Classificator training

First of all, we should train face classifier that checks if face represented on sample.

In [None]:
BATCH_SIZE = 64

In [None]:
from keras.preprocessing.image import ImageDataGenerator # Usefull thing. Read the doc.

datagen = ImageDataGenerator(horizontal_flip=True,
                             width_shift_range=0.2,
                             height_shift_range=0.2)
datagen.fit(X_train)

In [None]:
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
# Very usefull, pay attention

def fit(model_name, model, datagen, X_train, Y_train, X_val, Y_val, class_weight=None, epochs=10, lr=0.001, verbose=False):
    """Fit model.

    You can edit this function anyhow.
    """

    if verbose:
        model.summary()

    model.compile(optimizer=RMSprop(lr=lr), # You can use another optimizer
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=BATCH_SIZE),
                                  validation_data=(datagen.standardize(X_val), Y_val),
                                  epochs=epochs, steps_per_epoch=len(X_train) / BATCH_SIZE,
                                  callbacks=[ModelCheckpoint("data/checkpoints/{model_name}".format(model_name=model_name) + "-{epoch:02d}-{val_loss:.2f}.hdf5", save_best_only=True),
                                             #
                                             # EarlyStopping(patience=20),
                                             # ReduceLROnPlateau(patience=10)
                                            ],
                                  class_weight=class_weight,

                                 )  # starts training

    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

In [None]:
from keras.models import Model, Sequential
from keras.layers import Flatten, Dense, Activation, Input, Dropout, Activation, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D

# Classification model
# For start you can try LeNet architecture

x = inputs = Input(shape=SAMPLE_SHAPE)

# Write code there

# This creates a model
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=predictions)

fit(model_name="MODEL_NAME", model=model, datagen=datagen, X_train=X_train, X_val=X_val, Y_train=Y_train, Y_val=Y_val)

After learning model weights saves in folder `data/checkpoints/`.
Use `model.load_weights(fname)` to load best weights from learning steps.

In [None]:
model.load_weight(...)

## Detection

If you have selected classification architecture with high validation score, you can use this architecture for detection.

Convert classification architecture to fully convolutional neural network (FCNN), that returns heatmap of activation.

You should replace fully-connected layers with convolutional layers. Then you need write function that replace fcnn weights with base model weights.

### Model

In [None]:
# FCNN

IMAGE_SHAPE = (176, 176, 3)

def generate_fcnn_model(image_shape):
    """After model compilation image size fixes.

    So, we need create a function to change size later.
    """
    x = inputs = Input(shape=image_shape)

    # Write code there

    # This creates a model
    predictions = Conv2D(1, (1, 1), activation='relu')(x)
    return Model(inputs=inputs, outputs=predictions)

fcnn_model = generate_fcnn_model(IMAGE_SHAPE)

In [None]:
def copy_weights(base_model, fcnn_model):
    """Set FCNN weights from base model.
    """

    fcnn_weights = []
    prev_fcnn_weights = fcnn_model.get_weights()
    prev_base_weights = base_model.get_weights()

    # Write code there

    fcnn_model.set_weights(fcnn_weights)

copy_weights(base_model=model, fcnn_model=fcnn_model)

### Model visualization

In [None]:
def visualize_heatmap(images, heatmap, n_cols=5, n_rows=1):
    """Visualize heatmap"""
    figure(figsize=(3 * n_cols, 2 * 3 * n_rows))
    for n,i in enumerate(np.arange(n_cols * n_rows)): #np.random.randint(len(heatmap), size = n_cols*n_rows)):
        plt.subplot(2 * n_rows, n_cols, n + 1)
        plt.axis('off')
        plt.imshow(images[i])

        plt.subplot(2 * n_rows, n_cols, n + 1 + n_cols)
        plt.axis('off')
        plt.imshow(heatmap[i])
    plt.show()

In [None]:
predictions = fcnn_model.predict(np.array(val_images))
visualize_heatmap(val_images, predictions[:, :, :, 0])

### Detector

In [None]:
# Detection
from skimage.feature import peak_local_max

def get_bboxes_and_decision_function(fcnn_model, images, image_shapes):
    cropped_images = np.array([transform.resize(image, IMAGE_SHAPE, mode="reflect") for image in images])
    pred_bboxes, decision_function = [], []

    # Predict
    predictions = fcnn_model.predict(cropped_images)

    # Write code there

    return pred_bboxes, decision_function

### Detector visualization

In [None]:
from matplotlib import patches

def show_bboxes(bboxes, ax, color="black", text=None):
    for i, bbox in enumerate(bboxes):
        ax.add_patch(patches.Rectangle((bbox[1], bbox[0]), bbox[3] - bbox[1], bbox[2] - bbox[0], fill=False, color=color))
        if text is not None:
            ax.text(bbox[1], bbox[0], text[i], color=color)

def visualize_bboxes(images, pred_bboxes, true_bboxes=None, decision_function=None, n_cols=5, n_rows=1):
    figure(figsize = (3*n_cols,3*n_rows))
    pred_bboxes = np.array(pred_bboxes, dtype=np.int32)
    if true_bboxes is not None:
        true_bboxes = np.array(true_bboxes, dtype=np.int32)

    for n,i in enumerate(np.random.choice(range(len(images)), size=n_cols * n_rows, replace=False)):
        ax = plt.subplot(n_rows,n_cols,n+1)
        plt.axis('off')
        plt.imshow(images[i])
        _text = (["{0:0.2f}".format(decision_function[prec]) for prec in np.where(pred_bboxes[:, 0] == i)[0]]
                 if decision_function is not None else None)

        show_bboxes(bboxes=pred_bboxes[pred_bboxes[:, 0] == i, 1:], ax=ax, color="blue", text=_text)

        if true_bboxes is not None:
            show_bboxes(bboxes=true_bboxes[true_bboxes[:, 0] == i, 1:], ax=ax, color="red")
    plt.show()

In [None]:
pred_bboxes, decision_function = get_bboxes_and_decision_function(fcnn_model=fcnn_model, images=val_images, image_shapes=val_shapes)

visualize_bboxes(images=val_images,
                 pred_bboxes=pred_bboxes,
                 true_bboxes=val_bboxes,
                 decision_function=decision_function
                )

## Detector score

In [None]:
from scores import best_match, average_precision

def precision_recall_curve(pred_bboxes, true_bboxes, decision_function):
    precision, recall = [], []

    # Write code there

    return precision, recall

def show_precision_recall(pred_bboxes, true_bboxes, decision_function):
    precision, recall = precision_recall_curve(pred_bboxes=pred_bboxes, true_bboxes=true_bboxes, decision_function=decision_function)
    ap = average_precision(precision=precision, recall=recall)

    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2,
                     color='b')

    plt.plot(recall, precision)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.xticks(np.arange(0, 1.05, 0.1))
    plt.yticks(np.arange(0, 1.05, 0.1))
    plt.grid(color="white")
    plt.title('Precision-Recall curve: AP={0:0.2f}'.format(ap))

In [None]:
show_precision_recall(pred_bboxes=pred_bboxes, true_bboxes=val_bboxes, decision_function=decision_function)

### Threshold

Select threshold for `recall=0.6`.

In [None]:
THRESHOLD = 3.11776

def detect(fcnn_model, images, image_shapes, threshold=THRESHOLD, return_decision=True):
    """Get bboxes with decision_function not less then threshold."""
    pred_bboxes, decision_function = get_bboxes_and_decision_function(fcnn_model, images, image_shapes)
    result, result_decision = [], []

    # Write code there

    if return_decision:
        return result, result_decision
    else:
        return result

In [None]:
pred_bboxes, decision_function = detect(fcnn_model=fcnn_model, images=val_images, image_shapes=val_shapes, return_decision=True)

visualize_bboxes(images=val_images,
                 pred_bboxes=pred_bboxes,
                 true_bboxes=val_bboxes,
                 decision_function=decision_function
                )

show_precision_recall(pred_bboxes=pred_bboxes, true_bboxes=val_bboxes, decision_function=decision_function)

### Test dataset

In [None]:
test_images, test_bboxes, test_shapes = load_dataset("test")
pred_bboxes, decision_function = get_bboxes_and_decision_function(fcnn_model=fcnn_model, images=test_images, image_shapes=test_shapes)
visualize_bboxes(images=test_images,
                 pred_bboxes=pred_bboxes,
                 true_bboxes=test_bboxes,
                 decision_function=decision_function
                )

show_precision_recall(pred_bboxes=pred_bboxes, true_bboxes=test_bboxes, decision_function=decision_function)

## Hard negative mining (optional)

You can upgrade the score with hard negative mining.

In [None]:
def hard_negative(train_images, image_shapes, train_bboxes, X_val, Y_val, base_model, fcnn_model):
    raise NotImplementedError()


In [None]:
hard_negative(train_images=train_images, image_shapes=train_shapes, train_bboxes=train_bboxes, X_val=X_val, Y_val=Y_val, base_model=model, fcnn_model=fcnn_model)

In [None]:
model.load_weights(...)
copy_weights(base_model=model, fcnn_model=fcnn_model)

pred_bboxes, decision_function = get_bboxes_and_decision_function(fcnn_model=fcnn_model, images=val_images, image_shapes=val_shapes)

visualize_bboxes(images=val_images,
                 pred_bboxes=pred_bboxes,
                 true_bboxes=val_bboxes,
                 decision_function=decision_function
                )

show_precision_recall(pred_bboxes=pred_bboxes, true_bboxes=val_bboxes, decision_function=decision_function)

## Real image dataset

Now we can test our algorithm on original (not scaled) data.

In [None]:
original_images, original_bboxes, original_shapes = load_dataset("original")
pred_bboxes, decision_function = get_bboxes_and_decision_function(fcnn_model=fcnn_model, images=original_images, image_shapes=original_shapes)
visualize_bboxes(images=original_images,
                 pred_bboxes=pred_bboxes,
                 true_bboxes=original_bboxes,
                 decision_function=decision_function
                )

show_precision_recall(pred_bboxes=pred_bboxes, true_bboxes=original_bboxes, decision_function=decision_function)

### Multi scale detector (optional)

Write and test detector with pyramid representation.

In [None]:
def multiscale_detector(fcnn_model, images, image_shapes):
    return []

### Next  step

Next steps in deep learning detection are R-CNN, Faster R-CNN and SSD architectures.
This architecture realization is quite complex.
For this reason the task doesn't cover them, but you can find the articles in the internet.