# Test the NCF module under folder [cf_ec2](../cf_ec2) with ml-1m dataset, save the best model (using integrated modules with compile and fit components, with gmf and mlp pretrain)

#### 4/20/2020, test with original paper's dataset

In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import (
    Adam,
    Adamax,
    Adagrad,
    SGD,
    RMSprop
)
from tensorflow.keras.layers import (
    Embedding, 
    Input,
    Flatten, 
    Multiply, 
    Concatenate,
    Dense
)

import sys
sys.path.append('../')
from cf_ec2 import (
    GMF,
    MLP,
    NCF,
    Data,
    evaluation,
    evaluation_grouped
)

#### check original paper's dataset

In [2]:
!tree ../metadata/original_dataset/

[01;34m../metadata/original_dataset/[00m
├── item_input
├── labels
├── testNegatives
├── testRatings
├── train
└── user_input

0 directories, 6 files


In [3]:
import pickle

In [4]:
with open('../metadata/original_dataset/train','rb') as fp:
    train = pickle.load(fp, encoding='latin1')
with open('../metadata/original_dataset/testRatings','rb') as fp:
    testRatings = pickle.load(fp, encoding='latin1')
with open('../metadata/original_dataset/testNegatives','rb') as fp:
    testNegatives = pickle.load(fp, encoding='latin1')

In [5]:
type(train), type(testRatings), type(testNegatives)

(scipy.sparse.dok.dok_matrix, list, list)

In [6]:
testRatings[:5]

[[0, 25], [1, 133], [2, 207], [3, 208], [4, 222]]

In [7]:
len(testRatings), len(testNegatives)

(6040, 6040)

In [8]:
with open('../metadata/original_dataset/user_input','rb') as fp:
    user_input = pickle.load(fp, encoding='latin1')
with open('../metadata/original_dataset/item_input','rb') as fp:
    item_input = pickle.load(fp, encoding='latin1')
with open('../metadata/original_dataset/labels','rb') as fp:
    labels = pickle.load(fp, encoding='latin1')    

In [9]:
type(user_input), type(item_input), type(labels)

(list, list, list)

#### reformat the test dataset

In [10]:
for idx,value in enumerate(testRatings):
    if idx<5:
        print('{}: {} //{}'.format(idx,value, testNegatives[idx][:5]))

0: [0, 25] //[1064, 174, 2791, 3373, 269]
1: [1, 133] //[1072, 3154, 3368, 3644, 549]
2: [2, 207] //[2216, 209, 2347, 3, 1652]
3: [3, 208] //[3023, 1489, 1916, 1706, 1221]
4: [4, 222] //[1794, 3535, 108, 593, 466]


In [11]:
user_test, item_test, labels_test = [],[],[]
for idx in range(len(testRatings)):
    user_test.extend(
        [testRatings[idx][0]]*(len(testNegatives[idx])+1)
    )
    item_test.append(testRatings[idx][1])
    item_test.extend(testNegatives[idx])
    labels_test.append(1)
    labels_test.extend([0]*len(testNegatives[idx]))

In [12]:
len(user_test)

604000

## step 1: create the model architecture and fit model with dataset from original paper

In [13]:
num_users, num_items = train.shape
num_users, num_items

(6040, 3706)

In [14]:
n_users = num_users
n_items = num_items
n_factors_gmf = 8
layers_mlp = [64,32,16,8]
# n_factors_gmf = 4
# layers_mlp = [16,8,4]
reg_gmf = 0.
reg_layers_mlp = [0.,0.,0.,0.]
learning_rate = 0.001
flg_pretrain = ''
filepath = ''
filepath_mlp_pretrain = ''
filepath_mlp_pretrain = ''
num_epochs = 20
batch_size = 256

ncf = NCF(
    n_users=n_users,
    n_items=n_items,
    n_factors_gmf=n_factors_gmf,
    layers_mlp=layers_mlp,
    reg_gmf=reg_gmf,
    reg_layers_mlp=reg_layers_mlp
)
ncf.create_model()

In [15]:
ncf.compile(learning_rate=learning_rate)

In [16]:
ncf.model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_mlp_User (Embedding)  (None, 1, 32)        193280      user_input[0][0]                 
__________________________________________________________________________________________________
embedding_mlp_Item (Embedding)  (None, 1, 32)        118592      item_input[0][0]                 
______________________________________________________________________________________________

In [18]:
# def fit(self, dataset, 
batch_size=256
num_epochs=20
path_model_weights='/Users/xyin/Documents/work/projects/rec_utils/metadata/ncf4/ncf-weights-improvement-{epoch:02d}-{val_loss:.4f}.hdf5'
path_csvlog='/Users/xyin/Documents/work/projects/rec_utils/metadata/ncf4/ncf_log.csv'

## create the callback metrics
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath= path_model_weights, 
    monitor='val_loss',
    verbose=1, 
    save_best_only=True
)
csvlog = tf.keras.callbacks.CSVLogger(
    filename=path_csvlog, 
    separator=',', 
    append=False
)
earlystop = tf.keras.callbacks.EarlyStopping(patience=12)
lrreduce = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss", 
    factor=0.3, 
    patience=4, 
    verbose=1
)  
metrics2 = evaluation_grouped.metricsCallback(batch_size=batch_size,log_steps=100)      
## fit the model
hist = ncf.model.fit(
    x = [
        np.array(user_input),
        np.array(item_input)
    ],
    y = np.array(labels),
    batch_size=batch_size,
    epochs=num_epochs,
    verbose=2,
    shuffle=True,
    callbacks=[metrics2,checkpoint,csvlog,earlystop,lrreduce],
    validation_data=(
        [
            np.array(user_test),
            np.array(item_test)
        ],
        np.array(labels_test)
    )
)  

Train on 4970845 samples, validate on 604000 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.16958, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/ncf4/ncf-weights-improvement-01-0.1696.hdf5
4970845/4970845 - 57s - loss: 0.3219 - accuracy: 0.8570 - val_loss: 0.1696 - val_accuracy: 0.9391
Epoch 2/20

Epoch 00002: val_loss improved from 0.16958 to 0.15503, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/ncf4/ncf-weights-improvement-02-0.1550.hdf5
4970845/4970845 - 56s - loss: 0.2744 - accuracy: 0.8802 - val_loss: 0.1550 - val_accuracy: 0.9410
Epoch 3/20

Epoch 00003: val_loss improved from 0.15503 to 0.15225, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/ncf4/ncf-weights-improvement-03-0.1523.hdf5
4970845/4970845 - 53s - loss: 0.2627 - accuracy: 0.8858 - val_loss: 0.1523 - val_accuracy: 0.9412
Epoch 4/20

Epoch 00004: val_loss improved from 0.15225 to 0.14039, saving model to /Users/xyin/Documents/wo

#### try to load the parameters from the best model

In [19]:
ncf.model.load_weights('../metadata/ncf4/ncf-weights-improvement-04-0.1404.hdf5')

In [21]:
%%time 

scores = ncf.model.evaluate(
    x = [
        np.array(user_test),
        np.array(item_test)
    ],
    y = np.array(labels_test),
    verbose=0
)

CPU times: user 16.8 s, sys: 2.44 s, total: 19.2 s
Wall time: 12.3 s


In [22]:
scores

[0.14038968149246955, 0.945404]

In [23]:
ncf.model.save('../metadata/ncf4/ncf-best.hdf5')

In [24]:
model4 = keras.models.load_model('../metadata/ncf4/ncf-best.hdf5')

In [26]:
%%time 

evaluator = evaluation_grouped.metricsEval(
    model=model4,
    users=user_input,
    items=item_input
)
evaluator.getRecs()
rmse,auc,logloss = evaluator.getOverlapBasedMetrics(
    user_test,
    item_test,
    labels_test
)
rmse,auc,logloss

100%|██████████| 6040/6040 [09:03<00:00, 11.12it/s]


CPU times: user 11min 20s, sys: 1min 22s, total: 12min 43s
Wall time: 9min 29s


(0.2016294613947125, 0.8940297085975318, 0.1403896816127934)

In [28]:
%%time

recall,precision,ndcg,map2 = evaluator.getRankBasedMetrics(
    user_test,
    item_test,
    labels_test
)
recall,precision,ndcg,map2

CPU times: user 50.7 s, sys: 8.58 s, total: 59.3 s
Wall time: 1min 1s


(0.029635761589403974,
 0.002963576158940397,
 0.013684980463253551,
 0.008958596131609377)

#### metrics are still one order lower than reported numbers in the paper