# DeepFM

<img src="../../img/deep_fm.png" >

In [1]:
import pandas as pd
import numpy as np
import json
import torch
from torch import nn
from torch.utils.data.dataset import Dataset
from collections import OrderedDict

In [2]:
learning_rate = 0.0001
batch_size = 64
num_epochs = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
with open('../data/movielens/feature_map.json') as obj:
    feature_map = json.load(obj)
feature_map

{'dataset_id': 'movielens',
 'num_fields': 26,
 'feature_specs': {'movieId': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 935,
   'index': 0},
  'userId': {'source': 'user',
   'type': 'categorical',
   'vocab_size': 22540,
   'index': 1},
  'rating': {'source': 'user',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 2},
  'timestamp': {'source': 'user',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 3},
  'releaseYear': {'source': 'item',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 4},
  'movieGenre1': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 18,
   'index': 5},
  'movieGenre2': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 18,
   'index': 6},
  'movieGenre3': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 15,
   'index': 7},
  'movieRatingCount': {'source': 'item',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 8},
  'movieAvgRating': {'source': 'item',
   'type': 'numerical',
 

In [4]:
class MovielensDataset(Dataset):
    def __init__(self, url):
        self.df = pd.read_csv(url)
    
    def __getitem__(self, idx):
        x, y = self.df.iloc[idx, :-1].values.astype(np.float32), self.df.iloc[idx, -1].astype(np.float32)
        return x, y
    
    def __len__(self):
        return self.df.shape[0]

In [5]:
train_dataset = MovielensDataset('../data/movielens/data_for_train.csv')
test_dataset = MovielensDataset('../data/movielens/data_for_test.csv')

In [6]:
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [7]:
class DeepFM(nn.Module):
    def __init__(self,
                 feature_map,
                 embedding_dim=10,
                 hidden_units=[256, 128, 64]):
        super(DeepFM, self).__init__()
        self.feature_map = feature_map
        # Embedding
        self.embedding = nn.ModuleDict()
        for feature, feature_spec in feature_map['feature_specs'].items():
            if feature_spec['type'] == 'numerical':
                self.embedding[feature] = nn.Linear(1, embedding_dim, bias=False)
            elif feature_spec['type'] == 'categorical':
                padding_idx = feature_spec.get('padding_idx', None)
                self.embedding[feature] = nn.Embedding(feature_spec['vocab_size'],
                                                       embedding_dim,
                                                       padding_idx=padding_idx)
        # FM
        self.batch_norm = nn.BatchNorm1d(feature_map['num_fields'])
        self.fm_layer = FM(feature_map['num_fields'])
        # DNN
        input_dim = feature_map['num_fields'] * embedding_dim
        hidden_units = [input_dim] + hidden_units
        hidden_layers = []
        for i in range(len(hidden_units) - 1):
            hidden_layers.append(nn.Linear(hidden_units[i], hidden_units[i + 1]))
            hidden_layers.append(nn.ReLU())
        hidden_layers.append(nn.Linear(hidden_units[-1], 1))
        self.dense_layer = nn.Sequential(*hidden_layers)
        # Sigmoid
        self.output_activation = nn.Sigmoid()

    def forward(self, X):
        feature_emb_list = []
        for feature, feature_spec in self.feature_map['feature_specs'].items():
            if feature_spec['type'] == 'numerical':
                raw_feature = X[:, feature_spec['index']].float().view(-1, 1)
            elif feature_spec['type'] == 'categorical':
                raw_feature = X[:, feature_spec['index']].long()
            embedding_vec = self.embedding[feature](raw_feature)
            feature_emb_list.append(embedding_vec)
        feature_emb = torch.stack(feature_emb_list, dim=1)
        out = self.fm_layer(self.batch_norm(X), feature_emb)
        out += self.dense_layer(feature_emb.flatten(start_dim=1))
        y_pred = self.output_activation(out).squeeze(1)
        return y_pred

class FM(nn.Module):
    def __init__(self, input_dim):
        super(FM, self).__init__()
        self.lr_layer = nn.Linear(input_dim, 1)
        self.product_layer = InnerProduct()

    def forward(self, X, feature_emb):
        out = self.lr_layer(X)
        out += self.product_layer(feature_emb)
        return out
    
class InnerProduct(nn.Module):
    def __init__(self):
        super(InnerProduct, self).__init__()
    
    def forward(self, X):
        sum_of_square = torch.sum(X, dim=1) ** 2 # sum then square
        square_of_sum = torch.sum(X ** 2, dim=1) # square then sum
        cross_term = sum_of_square - square_of_sum
        return cross_term.sum(dim=-1, keepdim=True) * 0.5

In [8]:
model = DeepFM(feature_map).to(device)
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)

        # Forward pass
        output = model(X)
        loss = criterion(output, y)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 300 == 0:
            print("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format(
                epoch + 1, num_epochs, i + 1, total_step, loss.item()))

Epoch [1/5], Step [300/1388] Loss: 30.3384
Epoch [1/5], Step [600/1388] Loss: 25.1653
Epoch [1/5], Step [900/1388] Loss: 26.0267
Epoch [1/5], Step [1200/1388] Loss: 21.2643
Epoch [2/5], Step [300/1388] Loss: 5.1713
Epoch [2/5], Step [600/1388] Loss: 4.4765
Epoch [2/5], Step [900/1388] Loss: 1.7232
Epoch [2/5], Step [1200/1388] Loss: 0.0996
Epoch [3/5], Step [300/1388] Loss: 1.7092
Epoch [3/5], Step [600/1388] Loss: 3.6385
Epoch [3/5], Step [900/1388] Loss: 1.6641
Epoch [3/5], Step [1200/1388] Loss: 0.2319
Epoch [4/5], Step [300/1388] Loss: 1.5725
Epoch [4/5], Step [600/1388] Loss: 3.1596
Epoch [4/5], Step [900/1388] Loss: 1.5932
Epoch [4/5], Step [1200/1388] Loss: 1.5625
Epoch [5/5], Step [300/1388] Loss: 1.6399
Epoch [5/5], Step [600/1388] Loss: 0.0005
Epoch [5/5], Step [900/1388] Loss: 3.1250
Epoch [5/5], Step [1200/1388] Loss: 0.0000


In [9]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for X, y in test_loader:
        X = X.to(device)
        y = y.to(device).bool()
        output = model(X)
        y_pred = output > 0.5
        total += y.shape[0]
        correct += (y_pred == y).sum().item()

    print('Accuracy of the model on the test images: {:.2f} %'.format(
        100 * correct / total))

Accuracy of the model on the test images: 98.82 %


## 读取数据

In [1]:
import tensorflow as tf

In [2]:
# Training samples path, change to your local path
training_samples_file_path = tf.keras.utils.get_file(
    'trainingSamples.csv', '../data/trainingSamples.csv')
# Test samples path, change to your local path
test_samples_file_path = tf.keras.utils.get_file('testSamples.csv',
                                                 '../data/testSamples.csv')


# load sample as tf dataset
def get_dataset(file_path):
    dataset = tf.data.experimental.make_csv_dataset(file_path,
                                                    batch_size=12,
                                                    label_name='label',
                                                    na_value="0",
                                                    num_epochs=1,
                                                    ignore_errors=True)
    return dataset


# split as test dataset and training dataset
train_data = get_dataset(training_samples_file_path)
test_data = get_dataset(test_samples_file_path)

Downloading data from ../data/trainingSamples.csv


ValueError: unknown url type: '../data/trainingSamples.csv'

## 特征工程

In [None]:
# movie id embedding feature
movie_col = tf.feature_column.categorical_column_with_identity(
    key='movieId', num_buckets=1001)
movie_emb_col = tf.feature_column.embedding_column(movie_col, 10)
movie_ind_col = tf.feature_column.indicator_column(
    movie_col)  # movid id indicator columns

# user id embedding feature
user_col = tf.feature_column.categorical_column_with_identity(
    key='userId', num_buckets=30001)
user_emb_col = tf.feature_column.embedding_column(user_col, 10)
user_ind_col = tf.feature_column.indicator_column(
    user_col)  # user id indicator columns

# genre features vocabulary
genre_vocab = [
    'Film-Noir', 'Action', 'Adventure', 'Horror', 'Romance', 'War', 'Comedy',
    'Western', 'Documentary', 'Sci-Fi', 'Drama', 'Thriller', 'Crime',
    'Fantasy', 'Animation', 'IMAX', 'Mystery', 'Children', 'Musical'
]
# user genre embedding feature
user_genre_col = tf.feature_column.categorical_column_with_vocabulary_list(
    key="userGenre1", vocabulary_list=genre_vocab)
user_genre_emb_col = tf.feature_column.embedding_column(user_genre_col, 10)
user_genre_ind_col = tf.feature_column.indicator_column(
    user_genre_col)  # user genre indicator columns
# item genre embedding feature
item_genre_col = tf.feature_column.categorical_column_with_vocabulary_list(
    key="movieGenre1", vocabulary_list=genre_vocab)
item_genre_emb_col = tf.feature_column.embedding_column(item_genre_col, 10)
item_genre_ind_col = tf.feature_column.indicator_column(
    item_genre_col)  # item genre indicator columns

# fm first-order term columns: without embedding and concatenate to the output layer directly
fm_first_order_columns = [
    movie_ind_col, user_ind_col, user_genre_ind_col, item_genre_ind_col
]

deep_feature_columns = [
    tf.feature_column.numeric_column('releaseYear'),
    tf.feature_column.numeric_column('movieRatingCount'),
    tf.feature_column.numeric_column('movieAvgRating'),
    tf.feature_column.numeric_column('movieRatingStddev'),
    tf.feature_column.numeric_column('userRatingCount'),
    tf.feature_column.numeric_column('userAvgRating'),
    tf.feature_column.numeric_column('userRatingStddev'), movie_emb_col,
    user_emb_col
]

## 模型训练和预测

In [None]:
# define input for keras model
inputs = {
    'movieAvgRating':
    tf.keras.layers.Input(name='movieAvgRating', shape=(), dtype='float32'),
    'movieRatingStddev':
    tf.keras.layers.Input(name='movieRatingStddev', shape=(), dtype='float32'),
    'movieRatingCount':
    tf.keras.layers.Input(name='movieRatingCount', shape=(), dtype='int32'),
    'userAvgRating':
    tf.keras.layers.Input(name='userAvgRating', shape=(), dtype='float32'),
    'userRatingStddev':
    tf.keras.layers.Input(name='userRatingStddev', shape=(), dtype='float32'),
    'userRatingCount':
    tf.keras.layers.Input(name='userRatingCount', shape=(), dtype='int32'),
    'releaseYear':
    tf.keras.layers.Input(name='releaseYear', shape=(), dtype='int32'),
    'movieId':
    tf.keras.layers.Input(name='movieId', shape=(), dtype='int32'),
    'userId':
    tf.keras.layers.Input(name='userId', shape=(), dtype='int32'),
    'userRatedMovie1':
    tf.keras.layers.Input(name='userRatedMovie1', shape=(), dtype='int32'),
    'userGenre1':
    tf.keras.layers.Input(name='userGenre1', shape=(), dtype='string'),
    'userGenre2':
    tf.keras.layers.Input(name='userGenre2', shape=(), dtype='string'),
    'userGenre3':
    tf.keras.layers.Input(name='userGenre3', shape=(), dtype='string'),
    'userGenre4':
    tf.keras.layers.Input(name='userGenre4', shape=(), dtype='string'),
    'userGenre5':
    tf.keras.layers.Input(name='userGenre5', shape=(), dtype='string'),
    'movieGenre1':
    tf.keras.layers.Input(name='movieGenre1', shape=(), dtype='string'),
    'movieGenre2':
    tf.keras.layers.Input(name='movieGenre2', shape=(), dtype='string'),
    'movieGenre3':
    tf.keras.layers.Input(name='movieGenre3', shape=(), dtype='string'),
}

item_emb_layer = tf.keras.layers.DenseFeatures([movie_emb_col])(inputs)
user_emb_layer = tf.keras.layers.DenseFeatures([user_emb_col])(inputs)
item_genre_emb_layer = tf.keras.layers.DenseFeatures([item_genre_emb_col
                                                      ])(inputs)
user_genre_emb_layer = tf.keras.layers.DenseFeatures([user_genre_emb_col
                                                      ])(inputs)

# The first-order term in the FM layer
fm_first_order_layer = tf.keras.layers.DenseFeatures(fm_first_order_columns)(
    inputs)

# FM part, cross different categorical feature embeddings
product_layer_item_user = tf.keras.layers.Dot(axes=1)(
    [item_emb_layer, user_emb_layer])
product_layer_item_genre_user_genre = tf.keras.layers.Dot(axes=1)(
    [item_genre_emb_layer, user_genre_emb_layer])
product_layer_item_genre_user = tf.keras.layers.Dot(axes=1)(
    [item_genre_emb_layer, user_emb_layer])
product_layer_user_genre_item = tf.keras.layers.Dot(axes=1)(
    [item_emb_layer, user_genre_emb_layer])

# deep part, MLP to generalize all input features
deep = tf.keras.layers.DenseFeatures(deep_feature_columns)(inputs)
deep = tf.keras.layers.Dense(64, activation='relu')(deep)
deep = tf.keras.layers.Dense(64, activation='relu')(deep)

# concatenate fm part and deep part
concat_layer = tf.keras.layers.concatenate([
    fm_first_order_layer, product_layer_item_user,
    product_layer_item_genre_user_genre, product_layer_item_genre_user,
    product_layer_user_genre_item, deep
], axis=1)
output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(concat_layer)

model = tf.keras.Model(inputs, output_layer)
# compile the model, set loss function, optimizer and evaluation metrics
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=[
                  'accuracy',
                  tf.keras.metrics.AUC(curve='ROC'),
                  tf.keras.metrics.AUC(curve='PR')
              ])

# train the model
model.fit(train_data, epochs=5)

# evaluate the model
test_loss, test_accuracy, test_roc_auc, test_pr_auc = model.evaluate(
    test_data)
print('\n\nTest Loss {}, Test Accuracy {}, Test ROC AUC {}, Test PR AUC {}'.
      format(test_loss, test_accuracy, test_roc_auc, test_pr_auc))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Test Loss 0.7350901961326599, Test Accuracy 0.6508021354675293, Test ROC AUC 0.6991999745368958, Test PR AUC 0.7295635938644409


In [None]:
# print some predict results
predictions = model.predict(test_data)
for prediction, goodRating in zip(predictions[:12],
                                  list(test_data)[0][1][:12]):
    print("Predicted good rating: {:.2%}".format(prediction[0]),
          " | Actual rating label: ",
          ("Good Rating" if bool(goodRating) else "Bad Rating"))

Predicted good rating: 38.51%  | Actual rating label:  Bad Rating
Predicted good rating: 14.12%  | Actual rating label:  Good Rating
Predicted good rating: 26.67%  | Actual rating label:  Good Rating
Predicted good rating: 83.84%  | Actual rating label:  Bad Rating
Predicted good rating: 70.61%  | Actual rating label:  Good Rating
Predicted good rating: 96.47%  | Actual rating label:  Good Rating
Predicted good rating: 48.75%  | Actual rating label:  Good Rating
Predicted good rating: 91.97%  | Actual rating label:  Good Rating
Predicted good rating: 89.56%  | Actual rating label:  Good Rating
Predicted good rating: 57.76%  | Actual rating label:  Bad Rating
Predicted good rating: 18.68%  | Actual rating label:  Good Rating
Predicted good rating: 34.84%  | Actual rating label:  Bad Rating


In [None]:
# DeepFM_v2

## 读取数据

import tensorflow as tf

"""
Diff with DeepFM:
    1. separate categorical features from dense features when processing first order features and second order features
    2. modify original fm part with a fully crossed fm part
"""

# Training samples path, change to your local path
training_samples_file_path = tf.keras.utils.get_file(
    'trainingSamples.csv', '../data/trainingSamples.csv')
# Test samples path, change to your local path
test_samples_file_path = tf.keras.utils.get_file('testSamples.csv',
                                                 '../data/testSamples.csv')


# load sample as tf dataset
def get_dataset(file_path):
    dataset = tf.data.experimental.make_csv_dataset(file_path,
                                                    batch_size=12,
                                                    label_name='label',
                                                    na_value="0",
                                                    num_epochs=1,
                                                    ignore_errors=True)
    return dataset


# split as test dataset and training dataset
train_data = get_dataset(training_samples_file_path)
test_data = get_dataset(test_samples_file_path)

## 特征工程

# define input for keras model
inputs = {
    'movieAvgRating':
    tf.keras.layers.Input(name='movieAvgRating', shape=(), dtype='float32'),
    'movieRatingStddev':
    tf.keras.layers.Input(name='movieRatingStddev', shape=(), dtype='float32'),
    'movieRatingCount':
    tf.keras.layers.Input(name='movieRatingCount', shape=(), dtype='int32'),
    'userAvgRating':
    tf.keras.layers.Input(name='userAvgRating', shape=(), dtype='float32'),
    'userRatingStddev':
    tf.keras.layers.Input(name='userRatingStddev', shape=(), dtype='float32'),
    'userRatingCount':
    tf.keras.layers.Input(name='userRatingCount', shape=(), dtype='int32'),
    'releaseYear':
    tf.keras.layers.Input(name='releaseYear', shape=(), dtype='int32'),
    'movieId':
    tf.keras.layers.Input(name='movieId', shape=(), dtype='int32'),
    'userId':
    tf.keras.layers.Input(name='userId', shape=(), dtype='int32'),
    'userRatedMovie1':
    tf.keras.layers.Input(name='userRatedMovie1', shape=(), dtype='int32'),
    'userGenre1':
    tf.keras.layers.Input(name='userGenre1', shape=(), dtype='string'),
    'userGenre2':
    tf.keras.layers.Input(name='userGenre2', shape=(), dtype='string'),
    'userGenre3':
    tf.keras.layers.Input(name='userGenre3', shape=(), dtype='string'),
    'userGenre4':
    tf.keras.layers.Input(name='userGenre4', shape=(), dtype='string'),
    'userGenre5':
    tf.keras.layers.Input(name='userGenre5', shape=(), dtype='string'),
    'movieGenre1':
    tf.keras.layers.Input(name='movieGenre1', shape=(), dtype='string'),
    'movieGenre2':
    tf.keras.layers.Input(name='movieGenre2', shape=(), dtype='string'),
    'movieGenre3':
    tf.keras.layers.Input(name='movieGenre3', shape=(), dtype='string'),
}

# movie id embedding feature
movie_col = tf.feature_column.categorical_column_with_identity(
    key='movieId', num_buckets=1001)
movie_emb_col = tf.feature_column.embedding_column(movie_col, 10)
movie_ind_col = tf.feature_column.indicator_column(
    movie_col)  # movid id indicator columns

# user id embedding feature
user_col = tf.feature_column.categorical_column_with_identity(
    key='userId', num_buckets=30001)
user_emb_col = tf.feature_column.embedding_column(user_col, 10)
user_ind_col = tf.feature_column.indicator_column(
    user_col)  # user id indicator columns

# genre features vocabulary
genre_vocab = [
    'Film-Noir', 'Action', 'Adventure', 'Horror', 'Romance', 'War', 'Comedy',
    'Western', 'Documentary', 'Sci-Fi', 'Drama', 'Thriller', 'Crime',
    'Fantasy', 'Animation', 'IMAX', 'Mystery', 'Children', 'Musical'
]

# user genre embedding feature
user_genre_col = tf.feature_column.categorical_column_with_vocabulary_list(
    key="userGenre1", vocabulary_list=genre_vocab)
user_genre_ind_col = tf.feature_column.indicator_column(user_genre_col)
user_genre_emb_col = tf.feature_column.embedding_column(user_genre_col, 10)

# item genre embedding feature
item_genre_col = tf.feature_column.categorical_column_with_vocabulary_list(
    key="movieGenre1", vocabulary_list=genre_vocab)
item_genre_ind_col = tf.feature_column.indicator_column(item_genre_col)
item_genre_emb_col = tf.feature_column.embedding_column(item_genre_col, 10)

# fm first-order categorical items
cat_columns = [
    movie_ind_col, user_ind_col, user_genre_ind_col, item_genre_ind_col
]

deep_columns = [
    tf.feature_column.numeric_column('releaseYear'),
    tf.feature_column.numeric_column('movieRatingCount'),
    tf.feature_column.numeric_column('movieAvgRating'),
    tf.feature_column.numeric_column('movieRatingStddev'),
    tf.feature_column.numeric_column('userRatingCount'),
    tf.feature_column.numeric_column('userAvgRating'),
    tf.feature_column.numeric_column('userRatingStddev')
]

## 模型训练与预测

first_order_cat_feature = tf.keras.layers.DenseFeatures(cat_columns)(inputs)
first_order_cat_feature = tf.keras.layers.Dense(
    1, activation=None)(first_order_cat_feature)
first_order_deep_feature = tf.keras.layers.DenseFeatures(deep_columns)(inputs)
first_order_deep_feature = tf.keras.layers.Dense(
    1, activation=None)(first_order_deep_feature)
## first order feature

first_order_feature = tf.keras.layers.Add()(
    [first_order_cat_feature, first_order_deep_feature])

second_order_cat_columns_emb = [
    tf.keras.layers.DenseFeatures([item_genre_emb_col])(inputs),
    tf.keras.layers.DenseFeatures([movie_emb_col])(inputs),
    tf.keras.layers.DenseFeatures([user_genre_emb_col])(inputs),
    tf.keras.layers.DenseFeatures([user_emb_col])(inputs)
]

second_order_cat_columns = []
for feature_emb in second_order_cat_columns_emb:
    feature = tf.keras.layers.Dense(64, activation=None)(feature_emb)
    feature = tf.keras.layers.Reshape((-1, 64))(feature)
    second_order_cat_columns.append(feature)

second_order_deep_columns = tf.keras.layers.DenseFeatures(deep_columns)(inputs)
second_order_deep_columns = tf.keras.layers.Dense(
    64, activation=None)(second_order_deep_columns)
second_order_deep_columns = tf.keras.layers.Reshape(
    (-1, 64))(second_order_deep_columns)
second_order_fm_feature = tf.keras.layers.Concatenate(
    axis=1)(second_order_cat_columns + [second_order_deep_columns])

## second_order_deep_feature
deep_feature = tf.keras.layers.Flatten()(second_order_fm_feature)
deep_feature = tf.keras.layers.Dense(32, activation='relu')(deep_feature)
deep_feature = tf.keras.layers.Dense(16, activation='relu')(deep_feature)


class ReduceLayer(tf.keras.layers.Layer):
    def __init__(self, axis, op='sum', **kwargs):
        super().__init__()
        self.axis = axis
        self.op = op
        assert self.op in ['sum', 'mean']

    def build(self, input_shape):
        pass

    def call(self, input, **kwargs):
        if self.op == 'sum':
            return tf.reduce_sum(input, axis=self.axis)
        elif self.op == 'mean':
            return tf.reduce_mean(input, axis=self.axis)
        return tf.reduce_sum(input, axis=self.axis)


second_order_sum_feature = ReduceLayer(1)(second_order_fm_feature)
second_order_sum_square_feature = tf.keras.layers.multiply(
    [second_order_sum_feature, second_order_sum_feature])
second_order_square_feature = tf.keras.layers.multiply(
    [second_order_fm_feature, second_order_fm_feature])
second_order_square_sum_feature = ReduceLayer(1)(second_order_square_feature)
## second_order_fm_feature
second_order_fm_feature = tf.keras.layers.subtract(
    [second_order_sum_square_feature, second_order_square_sum_feature])

concatenated_outputs = tf.keras.layers.Concatenate(axis=1)(
    [first_order_feature, second_order_fm_feature, deep_feature])
output_layer = tf.keras.layers.Dense(
    1, activation='sigmoid')(concatenated_outputs)

model = tf.keras.Model(inputs, output_layer)
# compile the model, set loss function, optimizer and evaluation metrics
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=[
                  'accuracy',
                  tf.keras.metrics.AUC(curve='ROC'),
                  tf.keras.metrics.AUC(curve='PR')
              ])

# train the model
model.fit(train_data, epochs=5)

# evaluate the model
test_loss, test_accuracy, test_roc_auc, test_pr_auc = model.evaluate(test_data)
print('\n\nTest Loss {}, Test Accuracy {}, Test ROC AUC {}, Test PR AUC {}'.
      format(test_loss, test_accuracy, test_roc_auc, test_pr_auc))

# print some predict results
predictions = model.predict(test_data)
for prediction, goodRating in zip(predictions[:12],
                                  list(test_data)[0][1][:12]):
    print("Predicted good rating: {:.2%}".format(prediction[0]),
          " | Actual rating label: ",
          ("Good Rating" if bool(goodRating) else "Bad Rating"))