# Deep Homography Net

In [1]:
import glob
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

config = tf.ConfigProto()
# config.gpu_options.allocator_type = 'BFC'
tf.Session(config = config)

  from ._conv import register_converters as _register_converters


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10148439541308051429
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3407900672
locality {
  bus_id: 1
  links {
  }
}
incarnation: 2099962373123739046
physical_device_desc: "device: 0, name: GeForce GTX 970, pci bus id: 0000:01:00.0, compute capability: 5.2"
]


<tensorflow.python.client.session.Session at 0x4cd1c18>

## Data Wrangling

Read the directory

In [20]:
def load_img_paths(target):
    '''
    Retrieve the full path of all images in the dataset
    '''
    return glob.glob(target + '/*.tif')

data_dir = r'../data'
original_data_dir = data_dir + ('/learning')
all_files = pd.DataFrame(load_img_paths(original_data_dir))
train_paths = all_files[0].values.tolist()
train_paths[:5]

['../data/learning\\Tp26_Y000_X000_040.tif',
 '../data/learning\\Tp26_Y000_X001_040.tif',
 '../data/learning\\Tp26_Y000_X002_040.tif',
 '../data/learning\\Tp26_Y000_X003_040.tif',
 '../data/learning\\Tp26_Y000_X004_040.tif']

## HomographyNet

Regression variant

> We use 8 convolutional layers with a max pooling layer (2x2, stride 2) after every two convolutions. The 8 convolutional layers have the following number of filters per layer: 64 [x4], 128 [x4]. The convolutional layers are followed by two fully connected layers. The first fully connected layer has 1024 units. Dropout with a probability of 0.5 is applied after the final convolutional layer and the first fully-connected layer.

> The regression network directly produces 8 real-valued
numbers and uses the Euclidean (L2) loss as the final layer
during training.

> using stochastic gradient descent (SGD) with momentum of 0.9. We use a base learning rate of 0.005 and decrease the learning rate by a factor of 10 after every 30,000 iterations.

In [18]:
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, MaxPooling2D, Input, Activation, Dropout
from keras.layers.convolutional import Conv2D
import keras.backend as K

def euclidean_distance(y_true, y_pred):
    return K.sqrt(K.maximum(K.sum(K.square(y_pred - y_true), axis=-1, keepdims=True), K.epsilon()))

def conv_block(m, filters):
    kernel = (3, 3)
    m = Conv2D(filters, kernel, padding='same', activation='relu')(m)
    m = Conv2D(filters, kernel, padding='same', activation='relu')(m)
    return MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(m)

def homography_regression_model(input_dims):
    input_shape=(*input_dims, 2)
    
    input_layer = Input(shape=input_shape, name='input_layer')
    
    x = conv_block(input_layer, 64)
    x = conv_block(x,           64)
    x = conv_block(x,           128)
    x = conv_block(x,           128)
    
    x = Conv2D(128, (3,3), padding='same', activation='relu')(x)
    x = Conv2D(128, (3,3), padding='same', activation='relu')(x)
    x = Dropout(0.5)(x)
    
    x = Flatten()(x)
    
    x = Dense(1024, name='FC_1024')(x)
    x = Dropout(0.5)(x)
    
    out = Dense(8, name='output')(x)
    
    return Model(inputs=input_layer, outputs=[out])

In [19]:
from keras.optimizers import SGD
from skimage.io import imread

img_size = (128, 128)

opt = SGD(lr=0.005, decay=1e-6, momentum=0.9)

my_model = homography_regression_model(img_size)
my_model.compile(optimizer=opt, loss=euclidean_distance)
my_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     (None, 128, 128, 2)       0         
_________________________________________________________________
conv2d_71 (Conv2D)           (None, 128, 128, 64)      1216      
_________________________________________________________________
conv2d_72 (Conv2D)           (None, 128, 128, 64)      36928     
_________________________________________________________________
max_pooling2d_29 (MaxPooling (None, 64, 64, 64)        0         
_________________________________________________________________
conv2d_73 (Conv2D)           (None, 64, 64, 64)        36928     
_________________________________________________________________
conv2d_74 (Conv2D)           (None, 64, 64, 64)        36928     
_________________________________________________________________
max_pooling2d_30 (MaxPooling (None, 32, 32, 64)        0         
__________

## Data Generators

We generate the "seemingly infinite training data" on the fly by using Keras' DataGenerators.

In [4]:
import math
from os import path
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.preprocessing.image import img_to_array, load_img
from keras.utils import Sequence
from skimage.transform import resize, rotate


class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        
class DataGenerator(Sequence):
    '''
    Adapted from https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly.html
    Allows for multiprocessing in the fit generator
    '''

    def __init__(self, train_set, batch_size, im_size):
        self.train = train_set
        self.batch_size = batch_size
        self.im_size = im_size

    def __len__(self):
        return int(np.ceil(len(self.train) / float(self.batch_size)))

    def read_image(self, fname):
        return load_img(fname, grayscale=True)
    
    # Will output sequence of tuples (image, test) given a datapath
    def __getitem__(self, idx):
        X = np.zeros(shape=(batch_size, self.im_size[0], self.im_size[1], 1))
        y  = np.zeros(shape=(batch_size, self.im_size[0], self.im_size[1], 1))
        batch = self.train[idx * self.batch_size:(idx + 1) * self.batch_size]
        for j,fname in enumerate(batch):
            img = self.read_image()

        return (X, y)

early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

# create weights file if it doesn't exist for ModelCheckpoint
from os import mkdir
try: 
    mkdir('tmp')
except FileExistsError:
    print('tmp directory already exists')

history = LossHistory()

## Training

> The networks are trained for for 90,000 total iterations using a batch size of 64.

No words on how many epochs.

In [None]:
batch_size = 16
epochs     = 50

training_generator   = AugmentedDataGenerator(train_paths, batch_size, img_size)
# training_generator   = DataGenerator(train_paths, batch_size, img_size)

# descriptive weight file naming
checkpointer = ModelCheckpoint(filepath=('tmp/weights-%d-%d.hdf5' % 
                                         (batch_size, img_size[0])), 
                               verbose=1, save_best_only=True)

hist = my_model.fit_generator(training_generator, 
    epochs=epochs,
    workers=3,
    verbose=2,
    callbacks=[history, checkpointer, early_stopping]
)