In [3]:
import keras
from keras.models import Model
from keras import layers
from keras import backend as K
from keras.engine.topology import Layer

from sklearn.metrics import roc_auc_score
import tensorflow as tf
import numpy as np

Using TensorFlow backend.


## build model

In [8]:
w = 224
h = 224
input_shape = (w, h, 3)
dropout = 0.2
latent_d = 10 # latent dimension

user_num = 1000 # for test, this should be obtained from the dataset


def euclidean_distance(vects): 
    # L2 distance
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)




input_i = layers.Input(shape=input_shape, name="input_i") # image of item i
input_j = layers.Input(shape=input_shape, name="input_j") # image of item j
input_idx = layers.Input(shape=[1], name="input_user", dtype='int32') # idx of user u


# customer layer, learn the latent matrix of theta_u
class ThetaLayer(Layer):
    
    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='theta_u_matrix', 
                                      shape=(user_num, latent_d),
                                      initializer='uniform',
                                      trainable=True)
        super(ThetaLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        assert isinstance(x, list)
        item, u = x # u: user idx; item: visual feature of item 
        return K.dot(K.gather(self.kernel, u), item)

    def compute_output_shape(self, input_shape):
        print('input shape', input_shape)
        return (input_shape[0][0], 1)


# load the VGG pretrained on imagenet
def create_base_vgg(dropout):
    vgg = keras.applications.vgg19.VGG19(
        include_top=False, # whether to include the fc layers
        weights='imagenet', 
        input_tensor=None, 
        input_shape=input_shape, 
        pooling='avg',  # in my experience, gloable avg works better than flatten, need to check
        classes=1000)
    x = vgg.output
#     x = layers.Flatten(name='flatten')(x)
    x = layers.Dense(256, activation='relu', name='fc1')(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(latent_d, activation='relu', name='predictions')(x)
    x = layers.Reshape(target_shape=(latent_d, 1))(x)
    
    return Model(inputs = vgg.input, outputs = x, name="base_vgg")



# because we re-use the same instance `base_vgg`, theta_layer,
# the weights of the network
# will be shared across the two branches
base_vgg = create_base_vgg(dropout)
theta = ThetaLayer(name='theta_layer')

x1 = base_vgg(input_i)
x1 = theta([x1, input_idx])

x2 = base_vgg(input_j)
x2 = theta([x2, input_idx])

# distance = layers.Lambda(euclidean_distance,
#                   output_shape=eucl_dist_output_shape)([x1, x2])
            
distance = layers.Subtract(name='substract')([x1, x2])
distance = layers.Activation(activation='sigmoid', name='sigmoid')(distance)

model = Model([input_i, input_j, input_idx], distance)

model.summary()

# -----------Evaluation---------------
# evaluation is different from training, 
# input of training: [user u, item i, item j]; 
# input of evaluation: [user u, item i]
predict_score = layers.Activation(activation='sigmoid')(x1)
evaluation_model = Model([input_i, input_idx], predict_score)
evaluation_model.summary()

input shape [(None, 10, 1), (None, 1)]
input shape [(None, 10, 1), (None, 1)]
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_i (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_j (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
base_vgg (Model)                (None, 10, 1)        20158282    input_i[0][0]                    
                                                                 input_j[0][0]                    
__________________________________________________________________________________________________
input_user (InputLayer)        

## load data

In [20]:
# load image

dataset_name = 'AmazonFashion6ImgPartitioned.npy'
dataset_dir = '../dataset/amazon/'
dataset = np.load(dataset_dir + dataset_name, encoding = 'latin1')

[user_train, user_validation, user_test, Item, usernum, itemnum] = dataset


def load_image(file_path):
    # image file to numpy
    img = image.load_img(f_path, target_size=(w, h))
    img = image.img_to_array(img)
    return img

{'title': "Cheap Monday Women's Tight Fit Jean, Hard Coated, 27", 'salesRank': {'Clothing': 586687}, 'imgs': 'ÿØÿà\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00ÿÛ\x00C\x00\x05\x03\x04\x04\x04\x03\x05\x04\x04\x04\x05\x05\x05\x06\x07\x0c\x08\x07\x07\x07\x07\x0f\x0b\x0b\t\x0c\x11\x0f\x12\x12\x11\x0f\x11\x11\x13\x16\x1c\x17\x13\x14\x1a\x15\x11\x11\x18!\x18\x1a\x1d\x1d\x1f\x1f\x1f\x13\x17"$"\x1e$\x1c\x1e\x1f\x1eÿÛ\x00C\x01\x05\x05\x05\x07\x06\x07\x0e\x08\x08\x0e\x1e\x14\x11\x14\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1eÿÀ\x00\x11\x08\x01¼\x01V\x03\x01"\x00\x02\x11\x01\x03\x11\x01ÿÄ\x00\x1f\x00\x00\x01\x05\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0bÿÄ\x00µ\x10\x00\x02\x01\x03\x03\x02\x04\x03\x05\x05\x04\x04\x00\x00\x01}\x01\x02\x03\x00\x04\x11\x05\x12!1A\x06\x13Qa\x07"q\x142\x81\x

In [None]:
# ----------------------metrics---------------------------
# auc_roc as the metrics
# for details about how to implement auc in keras, see https://github.com/keras-team/keras/issues/3230
def auc(y_true, y_pred):
    ptas = tf.stack([binary_PTA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
    pfas = tf.stack([binary_PFA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
    pfas = tf.concat([tf.ones((1,)) ,pfas],axis=0)
    binSizes = -(pfas[1:]-pfas[:-1])
    s = ptas*binSizes
    return K.sum(s, axis=0)

# PFA, prob false alert for binary classifier
def binary_PFA(y_true, y_pred, threshold=K.variable(value=0.5)):
    y_pred = K.cast(y_pred >= threshold, 'float32')
    # N = total number of negative labels
    N = K.sum(1 - y_true)
    # FP = total number of false alerts, alerts from the negative class labels
    FP = K.sum(y_pred - y_pred * y_true)
    return FP/N

# P_TA prob true alerts for binary classifier
def binary_PTA(y_true, y_pred, threshold=K.variable(value=0.5)):
    y_pred = K.cast(y_pred >= threshold, 'float32')
    # P = total number of positive labels
    P = K.sum(y_true)
    # TP = total number of correct alerts, alerts from the positive class labels
    TP = K.sum(y_pred * y_true)
    return TP/P


# ----------------------loss function---------------------------
# not sure which loss function is better
def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    sqaure_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)

def entropy_loss(y_true, y_pred):
    ''' from Comparative Deep Learning of Hybrid Representations for Image Recommendations
    https://arxiv.org/pdf/1604.01252.pdf
    use crose entropy as the loss
    '''
    margin = 1
    sqaure_pred = -K.log(y_pred)
    margin_square = -K.log(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)

# ----------------------optimizer---------------------------
# optimizer: rms or adam?
rms = keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)




In [None]:
batch_size = 128
epochs = 30
# first: freeze all convolutional layers, only train fc layers (which were randomly initialized)
for layer in base_vgg.layers[:-4]:
    layer.trainable = False
for layer in base_vgg.layers[-4:]:
    layer.trainable = True

# set trainable layers before model compile
model.compile(optimizer=rms, loss=contrastive_loss)
# model.fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)
# evaluation_model.evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None)

# second: train the last conv block
for layer in base_vgg.layers[:-10]:
    layer.trainable = False
for layer in base_vgg.layers[-10:]:
    layer.trainable = True

# set trainable layers before model compile
model.compile(optimizer=rms, loss=contrastive_loss)
# model.fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)
# evaluation_model.evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None)
