# Test the NCF modules under folder [cf_ec2](../cf_ec2)

In [1]:
import numpy as np 
import pandas as pd
import keras
from keras import Model
from keras.regularizers import l2
from keras.optimizers import (
    Adam,
    Adamax,
    Adagrad,
    SGD,
    RMSprop
)
from keras.layers import (
    Embedding, 
    Input,
    Flatten, 
    Multiply, 
    Concatenate,
    Dense
)

import sys
sys.path.append('../')
from cf_ec2 import (
    GMF,
    MLP,
    NCF,
    Data
)

Using TensorFlow backend.


## step 1: load the data

In [12]:
n_users = 6040
n_items = 3704
n_factors_gmf = 32
layers_mlp = [64,32,16,8]
reg_gmf = 0.
reg_layers_mlp = [0.,0.,0.,0.]
learning_rate = 0.001
flg_pretrain = ''
filepath = ''
filepath_gmf_pretrain = ''
filepath_mlp_pretrain = ''
num_epochs = 2
batch_size = 100

## step 2: build the model

In [13]:
ncf = NCF(
    n_users=n_users,
    n_items=n_items,
    n_factors_gmf=n_factors_gmf,
    layers_mlp=layers_mlp,
    reg_gmf=reg_gmf,
    reg_layers_mlp=reg_layers_mlp
)
model = ncf.create_model()
#### compile the model
model.compile(
    optimizer=Adam(lr=learning_rate),
    loss='binary_crossentropy'
)

## step 3: load pretrained model

In [14]:
if filepath_gmf_pretrain!='' and filepath_mlp_pretrain!='':
    #### initialize the models
    model_gmf = GMF(
        n_users=n_users,
        n_items=n_items,
        n_factors_gmf=n_factors_gmf,
        reg_gmf=reg_gmf
    ).create_model()
    model_mlp = MLP(
        n_users=n_users,
        n_items=n_items,
        layers_mlp=layers_mlp,
        reg_layers_mlp=reg_layers_mlp
    ).create_model()
    #### load the pretrained weights
    model_gmf.load_weights(filepath_gmf_pretrain)
    model_mlp.load_weights(filepath_mlp_pretrain)
    #### combine and generate the full ncf model
    model = ncf.load_pretrain_model(
        model=model,
        model_gmf=model_gmf,
        model_mlp=model_mlp,
        num_layers_mlp=len(layers_mlp)
    )

## step 4: train the model

In [15]:
#### load the raw training data
train = pd.read_csv(
    '../data/ml-1m.train.rating',
    sep='\t',
    header=None,
    names=['user','item','rating','timestamp']
)

In [16]:
train.head(3)

Unnamed: 0,user,item,rating,timestamp
0,0,32,4,978824330
1,0,34,4,978824330
2,0,4,5,978824291


In [17]:
train.user.nunique(), train.item.nunique()

(6040, 3704)

In [18]:
#### generate the set of user/item/label lists
dataset = Data(
    train=train,
    col_user='user',
    col_item='item',
    col_rating='rating',
    binary=True
)
dataset.prepTrainDNN()
users_input = dataset.users
items_input = dataset.items
labels = dataset.ratings

In [19]:
users_input[:5], items_input[:5], labels[:5]

(array([0, 0, 0, 0, 0]), array([0, 1, 2, 3, 4]), array([1., 1., 1., 1., 1.]))

In [20]:
dataset.id2item[0]

32

In [21]:
len(users_input)

994169

In [22]:
#### train
hist = model.fit(
    x = [
        np.array(users_input),
        np.array(items_input)
    ],
    y = np.array(labels),
    batch_size=batch_size,
    epochs=1,
    verbose=2,
    shuffle=True
)

Epoch 1/1
 - 46s - loss: 0.0054


In [23]:
#### train
hist = model.fit(
    x = [
        np.array(users_input),
        np.array(items_input)
    ],
    y = np.array(labels),
    batch_size=10,
    epochs=2,
    verbose=2,
    shuffle=True
)

Epoch 1/2
 - 445s - loss: 1.1943e-07
Epoch 2/2
 - 442s - loss: 1.1921e-07
