In [44]:
import keras
from keras.models import Model
from keras import layers
from keras import backend as K
from sklearn.metrics import roc_auc_score

import tensorflow as tf
import numpy as np

In [32]:



w = 224
h = 224
input_shape = (w, h, 3)
dropout = 0.1


def euclidean_distance(vects): 
    # l2 distance
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)




input_a = layers.Input(shape=input_shape, name="inputa")
input_b = layers.Input(shape=input_shape, name="inputb")


# load the VGG pretrained on imagenet
def create_base_vgg(dropout):
    vgg = keras.applications.vgg19.VGG19(
        include_top=False, # whether to include the fc layers
        weights='imagenet', 
        input_tensor=None, 
        input_shape=input_shape, 
        pooling=None, 
        classes=1000)
    x = vgg.output
    x = layers.Flatten(name='flatten')(x)
    x = layers.Dense(4096, activation='relu', name='fc1')(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(4096, activation='relu', name='fc2')(x)
#     x = layers.Dropout(dropout)(x)
#     x = layers.Dense(classes, activation='softmax', name='predictions')(x)
    
    return Model(inputs = vgg.input, outputs = x, name="base_vgg")

# because we re-use the same instance `base_vgg`,
# the weights of the network
# will be shared across the two branches
base_vgg = create_base_vgg(dropout)
x1 = base_vgg(input_a)
x2 = base_vgg(input_b)

distance = layers.Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([x1, x2])

model = Model([input_a, input_b], distance)

model.summary()






__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputa (InputLayer)             (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
inputb (InputLayer)             (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
base_vgg (Model)                (None, 4096)         139570240   inputa[0][0]                     
                                                                 inputb[0][0]                     
__________________________________________________________________________________________________
lambda_4 (Lambda)               (None, 1)            0           base_vgg[1][0]                   
          

In [33]:
# load image
def load_image(file_path):
    img = image.load_img(f_path, target_size=(w, h))
    img = image.img_to_array(img)
    return img

In [45]:
# ----------------------metrics---------------------------
# auc_roc as the metrics
# for details, see https://github.com/keras-team/keras/issues/3230
def auc(y_true, y_pred):
    ptas = tf.stack([binary_PTA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
    pfas = tf.stack([binary_PFA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
    pfas = tf.concat([tf.ones((1,)) ,pfas],axis=0)
    binSizes = -(pfas[1:]-pfas[:-1])
    s = ptas*binSizes
    return K.sum(s, axis=0)

# PFA, prob false alert for binary classifier
def binary_PFA(y_true, y_pred, threshold=K.variable(value=0.5)):
    y_pred = K.cast(y_pred >= threshold, 'float32')
    # N = total number of negative labels
    N = K.sum(1 - y_true)
    # FP = total number of false alerts, alerts from the negative class labels
    FP = K.sum(y_pred - y_pred * y_true)
    return FP/N

# P_TA prob true alerts for binary classifier
def binary_PTA(y_true, y_pred, threshold=K.variable(value=0.5)):
    y_pred = K.cast(y_pred >= threshold, 'float32')
    # P = total number of positive labels
    P = K.sum(y_true)
    # TP = total number of correct alerts, alerts from the positive class labels
    TP = K.sum(y_pred * y_true)
    return TP/P


# ----------------------loss function---------------------------
# not sure which loss function is better
def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    sqaure_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)

def entropy_loss(y_true, y_pred):
    ''' from Comparative Deep Learning of Hybrid Representations for Image Recommendations
    https://arxiv.org/pdf/1604.01252.pdf
    use crose entropy as the loss
    '''
    margin = 1
    y_pred = K.sigmoid(y_pred)
    sqaure_pred = -K.log(y_pred)
    margin_square = -K.log(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)

# ----------------------optimizer---------------------------
# optimizer: rms or adam?
rms = keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)




In [None]:
# first: train only the top layers (which were randomly initialized)
# freeze all convolutional layers, train fc layers
for layer in base_vgg.layers[:-4]:
    layer.trainable = False
for layer in base_vgg.layers[-4:]:
    layer.trainable = True

# set trainable layers before model compile
model.compile(optimizer=rms, loss=contrastive_loss, metrics = [auc])