### Import Libraries
Keras, Tensorflow, Numpy and sklearn

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import keras
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.metrics import MeanSquaredError
from keras.utils.layer_utils import count_params
from keras.layers import Activation, Conv2D, Input, Add

%matplotlib inline
plt.rcParams['figure.figsize'] = [2.5, 2.5]

### Define Constants
Here I am defining some constants which we will be used throughout the notebook


In [3]:
''' The dimesnion of the window for which we want to make predictions '''
WINDOW_DIM = 80

''' The dimension of the to be window predicted by the model '''
MODEL_OUTPUT_DIM = 76

''' The offset for the prediction window within the input window '''
MODEL_WINDOW_START = (WINDOW_DIM - MODEL_OUTPUT_DIM) // 2

''' Definition of important directories '''
root_directory = '../'
dataset_directory = root_directory + 'Dataset/'
model_directory = root_directory + 'Model/'

### Load Dataset

In [4]:
X_dataset = np.array(np.load(dataset_directory + 'X_dummy.npy'), dtype=np.uint8)
Y_dataset = np.array(np.load(dataset_directory + 'Y_dummy.npy'), dtype=np.uint8)
dataset_length = X_dataset.shape[0]

X = np.zeros((dataset_length, WINDOW_DIM, WINDOW_DIM, 6), dtype=np.uint8) 
Y = np.zeros((dataset_length, MODEL_OUTPUT_DIM, MODEL_OUTPUT_DIM, 3), dtype=np.uint8)
for i in range(0, dataset_length): 
    X[i - 0, :, :, :] = X_dataset[i, 0:WINDOW_DIM, 0:WINDOW_DIM, :]
    Y[i - 0, :, :, :] = Y_dataset[i, MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 
            MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 0:3]

# 
del X_dataset, Y_dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.05, random_state=0)
del X, Y

### Measure dataset roughness statistic

Here, I am trying to use the degree of deviation of the middle frame generated via averaging, from the actual middle frame, to model the difficulty of prediction.

In [5]:
''' Train MSE by averaging '''
print("Averaging Train MSE = {}".format(np.mean((((X_train[:, MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 
        MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 0:3].astype(np.uint16) + X_train[:, MODEL_WINDOW_START:MODEL_WINDOW_START + 
        MODEL_OUTPUT_DIM, MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 3:6].astype(np.uint16)) // 2) - Y_train) ** 2)))

''' Test MSE by averaging '''
print("Averaging Test MSE = {}".format(np.mean((((X_test[:, MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 
        MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 0:3].astype(np.uint16) + X_test[:, MODEL_WINDOW_START:MODEL_WINDOW_START + 
        MODEL_OUTPUT_DIM, MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 3:6].astype(np.uint16)) // 2) - Y_test) ** 2)))

Averaging Train MSE = 157.86339663216214
Averaging Test MSE = 170.77380540166206


### Define the cost funtion for our model

Our cost function is going to consist of two parts. The first part simply computes the mean squared error between the actual middle frame and the predicted middle frame. The second part of our cost function tries to capture how well defined are the edges of different objects within the predicted frame, and penalizes the model for blurry and noisy images.

The second part of the cost function defined above may seem slightly redundant, as the mean squared error function is also capable of penalizing the model for producing blurry images. However, adding another cost part, which caters specially to the degree of definition of edges within the output frame, does not hurt, especially when we consider the fact that number of points lying on the edges of an object are much less, when compared to number of points lying within an object.


In [6]:
''' The relative weight of the second part of our cost function '''
EDGE_LOSS_WEIGHT = 2.0

In [None]:
''' A differentiale version of tf.math.greater '''
def greater(input_tensor, val):
    input_tensor_diff = tf.math.subtract(input_tensor, tf.constant(val))
    return tf.math.divide_no_nan(input_tensor_diff, tf.nn.relu(input_tensor_diff))

In [7]:
''' The function defined below models the degree of definition of edges within 
    the output frame, by computing the dissimilarity ofany pixel from its neighboring pixels.
'''
def compute_edge_loss(y_pred, y_true, max_translation):
    dim = MODEL_OUTPUT_DIM
    pred_diff, true_diff = tf.constant(0.0), tf.constant(0.0)
    for i in range(-max_translation, max_translation + 1):
        for j in range(-max_translation, max_translation + 1):
            y_pred_window_1 = tf.slice(y_pred, [0, max(0, i), max(0, j), 0], [-1, min(dim, dim + i) - 
                    max(0, i), min(dim, dim + j) - max(0, j), -1])
            y_pred_window_2 = tf.slice(y_pred, [0, max(0, -i), max(0, -j), 0], [-1, min(dim, dim - i) - 
                    max(0, -i), min(dim, dim - j) - max(0, -j), -1])
            y_true_window_1 = tf.slice(y_true, [0, max(0, i), max(0, j), 0], [-1, min(dim, dim + i) - 
                    max(0, i), min(dim, dim + j) - max(0, j), -1])
            y_true_window_2 = tf.slice(y_true, [0, max(0, -i), max(0, -j), 0], [-1, min(dim, dim - i) - 
                    max(0, -i), min(dim, dim - j) - max(0, -j), -1])
            
            y_pred_diff = (tf.cast(greater(tf.math.reduce_sum(tf.math.abs(tf.math.subtract(y_pred_window_1, 
                    y_pred_window_2)), axis=3), 60.0), dtype=tf.float32) * 50)
            y_true_diff = (tf.cast(greater(tf.math.reduce_sum(tf.math.abs(tf.math.subtract(y_true_window_1, 
                    y_true_window_2)), axis=3), 60.0), dtype=tf.float32) * 50)
            pred_diff = tf.math.add(pred_diff, tf.slice(y_pred_diff, [0, max_translation - max(0, i), max_translation - max(0, j)], 
                    [-1, dim - 2 * max_translation, dim - 2 * max_translation]))
            true_diff = tf.math.add(true_diff, tf.slice(y_true_diff, [0, max_translation - max(0, i), max_translation - max(0, j)], 
                    [-1, dim - 2 * max_translation, dim - 2 * max_translation]))
            
    return tf.math.squared_difference(tf.math.reduce_mean(pred_diff, axis=[1, 2]), tf.math.reduce_mean(true_diff, axis=[1, 2]))

In [8]:
''' The overall cost function '''
def edge_fill_loss(y_pred, y_true):
    y_pred = tf.cast(y_pred, tf.float32)
    y_true = tf.cast(y_true, tf.float32)
    loss = tf.math.reduce_mean(tf.math.squared_difference(y_pred, y_true), axis=[1, 2, 3])
    loss = tf.math.add(loss, tf.constant(EDGE_LOSS_WEIGHT) * compute_edge_loss(y_pred, y_true, 1))
    return loss

### Define function to retreive a keras model
I am using a 13 layer deep convolutional network for our task. You would notice that I haven't used any downsampling layers, so as to preserve imformation for frame regeneration. Also, I tried using resnets, but that did not seem to provide any appreciable improvements over the current model.

In [14]:
def get_model(reg_const=0):
    inputs = Input(shape=(WINDOW_DIM, WINDOW_DIM, 6))
    Y = Activation('relu')(Conv2D(200, kernel_size=3, padding='valid', strides=(1, 1), kernel_regularizer=l2(reg_const))(inputs))
    Y = Activation('relu')(Conv2D(200, kernel_size=3, padding='valid', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(200, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(250, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(250, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(250, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(300, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(300, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(300, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(350, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(350, kernel_size=3, padding='same', strides=(1, 1), kernel_regularizer=l2(reg_const))(Y))
    Y = Activation('relu')(Conv2D(350, kernel_size=3, padding='same', strides=(1, 1))(Y))
    Y = Activation('relu')(Conv2D(3, kernel_size=7, padding='same', strides=(1, 1))(Y))
    model = Model(inputs=inputs, outputs=Y)
    return model

### Train Model
The following section of code retreives a kears model instance and trains it over a number of epochs. Also I'm using mean squared error as our model evaluation metric throughout the rest of the notebook, instead of the entire two part cost function we defined above. (The use of different functions for training and evaluation may seem incorrect, but since the mean squared error function is essentially the meat and bones of our cost function, I figured no harm could be incurred if we did use MSE for evaluation)

In [15]:
''' Retreive a kears model instance '''
model = get_model(0.0)
print("Trainable Parameters Count = ", count_params(model.trainable_weights))

Trainable Parameters Count =  7805553


In [None]:
''' Define a model checkpoint to save the weights of the model to file after each epoch of training '''
checkpoint_filepath = model_directory + 'model80_76_.h5'
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath, 
    monitor='loss',
    save_weights_only=True,
    save_best_only=True, 
    mode='min'
)

model.compile(
    loss=edge_fill_loss,
    optimizer=Adam(0.00004), 
    metrics=[MeanSquaredError()] 
)

model.fit(
    x=X_train, 
    y=Y_train,  
    epochs=12,
    batch_size=64, 
    validation_data=(X_test, Y_test), 
    callbacks=[model_checkpoint_callback]
)

### Evaluate Model
Obtain model MSE for the train and the test set

In [17]:
''' Train MSE '''
Y_pred_train = model.predict(X_train)
print("Model Train MSE = {}".format(np.mean((Y_pred_train - Y_train) ** 2)))

''' Test MSE '''
Y_pred_test = model.predict(X_test)
print("Model Test MSE = {}".format(np.mean((Y_pred_test - Y_test) ** 2)))

Model Train MSE = 238.6639862060547
Model Test MSE = 251.4951934814453


### Test Model
The following piece of code obtains a middle frame prediction for a pair of frames, and compares the quality of prediction against the average frame.

In [18]:
''' Index of the example to be chosen from the test set '''
NUM = -1

In [None]:
X1 = (X_test[NUM, MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 
        MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 0:3])
X2 = (X_test[NUM, MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 
        MODEL_WINDOW_START:MODEL_WINDOW_START + MODEL_OUTPUT_DIM, 3:6])

Y_pred = model.predict(X_test[[NUM], 0:WINDOW_DIM, 0:WINDOW_DIM, :])[0] 
Y_pred = np.maximum(0, Y_pred)
Y_pred = np.minimum(255, Y_pred)
Y_act = (Y_test[NUM, :, :, :])

In [None]:
plt.imshow((X1 / 255)[:, :, ::-1])
print('\nThe first frame\n')

In [None]:
plt.imshow((X2 / 255)[:, :, ::-1])
print('\nThe second frame\n')

In [None]:
plt.imshow((Y_pred.astype(np.uint8) / 255)[:, :, ::-1])
print('\nThe predicted middle frame\n')

In [None]:
plt.imshow((Y_act / 255)[:, :, ::-1])
print('\nThe actual middle frame\n')

The function defined below is essentially the numpy version of the second part of the cost function we defined above. It can be used to gain an insight into what the target edge definition for any middle frame looks like, and how well the model is able to achieve it. 

In [None]:
def edge_detection(img, max_translation):
    dim = img.shape[0]
    result = np.zeros((dim - 2 * max_translation, dim - 2 * max_translation))
    for i in range(-max_translation, max_translation + 1):
        for j in range(-max_translation, max_translation + 1):
            img_window_1 = np.array(img[max(0, i):min(dim, dim + i), max(0, j):min(dim, dim + j), :], dtype=np.float32)
            img_window_2 = np.array(img[max(0, -i):min(dim, dim - i), max(0, -j):min(dim, dim - j), :], dtype=np.float32)
            img_diff = np.array(np.sum(np.absolute(img_window_1 - img_window_2), axis=2) > 60) * 50
            upp_index = max_translation - max(0, i)
            down_index = upp_index + dim - 2 * max_translation
            left_index = max_translation - max(0, j)
            right_index = left_index + dim - 2 * max_translation
            result[:, :] += img_diff[upp_index:down_index, left_index:right_index]
    plt.imshow(result)

In [None]:
edge_detection(Y_act, 1)

In [None]:
edge_detection(Y_pred, 1)

### Save model to file

In [None]:
model_json = model.to_json()
with open(model_directory + 'model80_76.json', "w") as json_file:
    json_file.write(model_json)

### Load model from file

In [9]:
json_file = open(model_directory + 'model80_76.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

from keras.models import model_from_json
model = model_from_json(loaded_model_json)

In [16]:
model.load_weights(model_directory + 'model80_76.h5')

In [12]:
print(model.summary())

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 80, 80, 6)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 78, 78, 200)       11000     
_________________________________________________________________
activation_1 (Activation)    (None, 78, 78, 200)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 76, 76, 200)       360200    
_________________________________________________________________
activation_2 (Activation)    (None, 76, 76, 200)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 76, 76, 200)       360200    
_________________________________________________________________
activation_3 (Activation)    (None, 76, 76, 200)       0   