In [None]:
%matplotlib inline

In [None]:
# import default scientific libraries
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

In [None]:
# import pandas for easy data management, keras for deep learning and scikit for feature generation
import pandas as pd
import keras
import keras.models as km
import keras.layers as kl
import keras.optimizers as ko
import mca
from sklearn import metrics, model_selection, manifold, decomposition

In [None]:
# item count
n_i = 1000

In [None]:
# user count
n_u = 10000

In [None]:
train_x = pd.read_csv('train_x.csv')

In [None]:
test_x = pd.read_csv('test_x.csv')

In [None]:
# build categorical label for each rating (train set), e.g. rating of 2 gives [0, 1, 0, 0, 0]
train_y = np.zeros([train_x.shape[0], 5])
train_y[np.arange(train_x.shape[0]), train_x.Prediction - 1] = 1
train_y.shape

In [None]:
# (same for test set)
test_y = np.zeros([test_x.shape[0], 5])
test_y[np.arange(test_x.shape[0]), test_x.Prediction - 1] = 1
test_y.shape

In [None]:
# load needed features

In [None]:
items = np.load('items.npy')
items.shape

In [None]:
items2 = np.load('items2.npy')
items2.shape

In [None]:
items3 = np.load('items3.npy')
items3.shape

In [None]:
users = np.load('users.npy')
users.shape

In [None]:
users2 = np.load('users2.npy')
users2.shape

In [None]:
users3 = np.load('users3.npy')
users3.shape

In [None]:
# ensure tensorflow does not leak
keras.backend.clear_session()

In [None]:
# build net 1
def net1():
    # normalization is used to increase stability among the net
    # dropout avoids overfitting by cutting some connections between the batches
    # relu was the best activation function found
    # the size of each layer is reducing to converge to the last one
    
    features = 48
    
    # input layer for items, embedding two dimensional (features x 10) so it needs to be flatten
    input_i = kl.Input(shape=[1])
    i = kl.Embedding(n_i + 1, features)(input_i)
    i = kl.Flatten()(i)
    i = kl.normalization.BatchNormalization()(i)

    # input layer for items, embedding two dimensional (features x 10) so it needs to be flatten
    input_u = kl.Input(shape=[1])
    u = kl.Embedding(n_u + 1, features)(input_u)
    u = kl.Flatten()(u)
    u = kl.normalization.BatchNormalization()(u)

    # input layer for item contexts
    input_im = kl.Input(shape=[items.shape[1]])
    im = kl.normalization.BatchNormalization()(input_im)
    input_im2 = kl.Input(shape=[items2.shape[1]])
    im2 = kl.normalization.BatchNormalization()(input_im2)
    input_im3 = kl.Input(shape=[users3.shape[1]])
    im3 = kl.normalization.BatchNormalization()(input_im3)

    # input layer for user contexts
    input_um = kl.Input(shape=[users.shape[1]])
    um = kl.normalization.BatchNormalization()(input_um)
    input_um2 = kl.Input(shape=[users2.shape[1]])
    um2 = kl.normalization.BatchNormalization()(input_um2)
    input_um3 = kl.Input(shape=[items3.shape[1]])
    um3 = kl.normalization.BatchNormalization()(input_um3)

    # merge everything together
    nn = kl.merge([i, u, im, um, im2, um2, im3, um3], mode='concat')
    
    # densely connectect layers
    nn = kl.Dense(1024, activation='relu')(nn)
    nn = kl.Dropout(0.5)(nn)
    nn = kl.normalization.BatchNormalization()(nn)
    nn = kl.Dense(512, activation='relu')(nn)
    nn = kl.Dropout(0.5)(nn)
    nn = kl.normalization.BatchNormalization()(nn)
    nn = kl.Dense(256, activation='relu')(nn)
    nn = kl.Dropout(0.5)(nn)
    nn = kl.normalization.BatchNormalization()(nn)
    nn = kl.Dense(128, activation='relu')(nn)

    # last layer is using softmax to obtain the confidence
    output = kl.Dense(5, activation='softmax')(nn)

    # optimize using adam and cross entropy among different binary labels
    model = km.Model([input_i, input_u, input_im, input_um, input_im2, input_um2, input_im3, input_um3], output)
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    return model

model = net1()

In [None]:
# training
epochs = model.fit(
    [train_x.Item, train_x.User, users[train_x.Item - 1], items[train_x.User - 1], users2[train_x.Item - 1], items2[train_x.User - 1], users3[train_x.Item - 1], items3[train_x.User - 1]], train_y,
    validation_data=([test_x.Item, test_x.User, users[test_x.Item - 1], items[test_x.User - 1], users2[test_x.Item - 1], items2[test_x.User - 1], users3[test_x.Item - 1], items3[test_x.User - 1]], test_y),
    batch_size=4096,
    nb_epoch=6
)

In [None]:
# plot training loss vs validation loss
plt.plot(epochs.history['loss'], label='loss')
plt.plot(epochs.history['val_loss'], label='val_loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()

In [None]:
# save model
model.save('model1.h5')

In [None]:
# compute prediction for train set
trained = model.predict(
    [train_x.Item, train_x.User, users[train_x.Item - 1], items[train_x.User - 1], users2[train_x.Item - 1], items2[train_x.User - 1], users3[train_x.Item - 1], items3[train_x.User - 1]], 
    batch_size=4096
)

In [None]:
# compute prediction for validation set
validated = model.predict(
    [test_x.Item, test_x.User, users[test_x.Item - 1], items[test_x.User - 1], users2[test_x.Item - 1], items2[test_x.User - 1], users3[test_x.Item - 1], items3[test_x.User - 1]], 
    batch_size=4096
)

In [None]:
# rmse choosing highest confidence for each rating
np.sqrt(metrics.mean_squared_error(np.argmax(validated, 1) + 1, test_x.Prediction))

In [None]:
# compute weighted mean between with rating confidence and rating
def weighted_mean(preds):
    ret = []
    for e, s in zip(preds, np.argsort(preds, axis=1)):
        # highest confidence rating (index of the sorted array)
        #               |
        #               v
        n1, n2, n3, n4, n5 = s
        val = (n5 * e[n5] + n4 * e[n4] + n3 * e[n3] + n2 * e[n2] + n1 * e[n1]) / (e[n1] + e[n2] + e[n3] + e[n4] + e[n5])
        ret.append(val + 1)
    return np.array(ret)

In [None]:
# abandonnated net 2

In [None]:
# build net 2
def net2():
    # normalization is used to increase stability among the net
    # dropout avoids overfitting by cutting some connections between the batches
    # relu was the best activation function found

    # input layer from the previous net
    input2 = kl.Input(shape=[5])

    # densely connectect layers
    nn = kl.Dense(128, activation='relu')(input2)
    nn = kl.Dropout(0.2)(nn)
    nn = kl.normalization.BatchNormalization()(nn)
    nn = kl.Dense(128, activation='relu')(nn)
    nn = kl.Dropout(0.2)(nn)
    nn = kl.normalization.BatchNormalization()(nn)
    nn = kl.Dense(128, activation='relu')(nn)
    nn = kl.Dropout(0.2)(nn)
    nn = kl.normalization.BatchNormalization()(nn)
    nn = kl.Dense(128, activation='relu')(nn)

    # output layer is the rating
    output = kl.Dense(1, activation='relu')(nn)

    # optimize using adam and mse among different ratings
    # decaying the optimizer is needed to have a convergence
    model2 = km.Model([input2], output)
    model2.compile(optimizer=ko.Adam(decay=0.0025), loss='mean_squared_error')
    return model2
    
model2 = net2()

In [None]:
# training
epochs2 = model2.fit(
    [trained], train_x.Prediction,
    validation_data=([validated], test_x.Prediction),
    batch_size=4096,
    nb_epoch=3
)

In [None]:
# compute prediction for train set
trained2 = model2.predict([trained], batch_size=4096)

In [None]:
# compute prediction for validation set
validated2 = model2.predict([validated], batch_size=4096)

In [None]:
# rmse using second net
np.sqrt(metrics.mean_squared_error(validated2, test_x.Prediction))

In [None]:
model2.save('model2.h5')