In [1]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.layers import *
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import *
from sklearn.preprocessing import LabelEncoder
# from tensorflow.keras.constraints import *

%matplotlib inline

# 准备数据

In [2]:
path = '/disk/share/criteo/'
data = pd.read_csv(path+'criteo_sampled_data.csv')
cols = data.columns.values

dense_feats = [f for f in cols if f[0] == "I"]
sparse_feats = [f for f in cols if f[0] == "C"]

def process_dense_feats(data, feats):
    d = data.copy()
    d = d[feats].fillna(0.0)
    for f in feats:
        d[f] = d[f].apply(lambda x: np.log(x+1) if x > -1 else -1)
    
    return d

data_dense = process_dense_feats(data, dense_feats)

vocab_sizes = {}
def process_sparse_feats(data, feats):
    d = data.copy()
    d = d[feats].fillna("-1")
    for f in feats:
        label_encoder = LabelEncoder()
        d[f] = label_encoder.fit_transform(d[f])
        vocab_sizes[f] = d[f].nunique() + 1
    return d

data_sparse = process_sparse_feats(data, sparse_feats)
total_data = pd.concat([data_dense, data_sparse], axis=1)
total_data['label'] = data['label']

# 自定义层

In [3]:
class SparseEmbedding(Layer):
    def __init__(self, sparse_feats, vocab_sizes, embed_dims=8):
        super().__init__()
        # 离散特征嵌入矩阵
        self.sparse_embeds_mat = []
        for idx, feat in enumerate(sparse_feats):
            # reg = tf.keras.regularizers.l2(0.5)
            emb = Embedding(input_dim=vocab_sizes[feat],
                            output_dim=embed_dims,
                            # embeddings_regularizer=reg,
                            name=f'{feat}_emb')
            self.sparse_embeds_mat.append(emb)
        
    def call(self, sparse_inputs):
        # FM 部分
        sparse_embeds = []
        for idx, emb_mat in enumerate(self.sparse_embeds_mat):
            emb = emb_mat(sparse_inputs[idx])
            sparse_embeds.append(emb)
        concat_sparse_embeds = Concatenate(axis=1)(sparse_embeds)
        return concat_sparse_embeds

In [4]:
class Linear(Layer):
    def __init__(self, sparse_feats, vocab_sizes):
        super().__init__()
        
        # 离散特1d征嵌入矩阵
        self.sparse_1d_embeds = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=1)
        
        self.fc_dense = Dense(1)
        self.fc_sparse = Dense(1)
        
    def call(self, inputs):
        dense_inputs, sparse_inputs = inputs[0], inputs[1]
        # 线性部分
        concat_dense_inputs = Concatenate(axis=1)(dense_inputs)
        first_order_dense_layer = self.fc_dense(concat_dense_inputs)
        
        concat_sparse_embeds_1d = self.sparse_1d_embeds(sparse_inputs) 
        flat_sparse_embeds_1d = Flatten()(concat_sparse_embeds_1d)
        first_order_sparse_layer = self.fc_sparse(flat_sparse_embeds_1d)
        
        linear_output = Add()([first_order_dense_layer, first_order_sparse_layer])
        return linear_output

In [5]:
class FM(Layer):
    def __init__(self):
        super().__init__()
        
    def call(self, concat_sparse_embeds):
        # 先求和再求平方
        sum_embeds = tf.reduce_sum(concat_sparse_embeds, axis=1)
        square_sum_embeds = Multiply()([sum_embeds, sum_embeds])
        # 先平方再求和
        square_embeds = Multiply()([concat_sparse_embeds, concat_sparse_embeds])
        sum_square_embeds = tf.reduce_sum(square_embeds, axis=1)
        # 相减除以2
        sub =  0.5 * Subtract()([square_sum_embeds, sum_square_embeds])
        # 相加
        snd_order_sparse_output = tf.reduce_sum(sub, axis=1, keepdims=True)
        return snd_order_sparse_output

In [6]:
class DNN(Layer):
    def __init__(self, hid_units=[256,256,256], use_dropout=True):
        super().__init__()
        self.use_dropout = use_dropout
        self.Dropout = Dropout(0.3)
        self.dense_layers = []
        for unit in hid_units:
            self.dense_layers.append(Dense(unit, activation='relu'))
        self.dense_layers.append(Dense(1))
        
    def call(self, concat_sparse_embeds):
        flat_sparse_embed = Flatten()(concat_sparse_embeds)
        
        x = self.dense_layers[0](flat_sparse_embed)
        for dense in self.dense_layers[1:]:
            x = dense(x)
            if self.use_dropout:
                x = self.Dropout(x)
        return x

# 构建模型 (keras函数式)

In [84]:
class DeepFM:
    def __init__(self, dense_feats, sparse_feats, vocab_sizes, embed_dims=8):
        
        # 连续特征
        self.dense_inputs = []
        for feat in dense_feats:
            self.dense_inputs.append(Input(shape=1, name=feat))
            
        # 离散特征
        self.sparse_inputs = []
        for feat in sparse_feats:
            self.sparse_inputs.append(Input(shape=1, name=feat))
        
        self.Linear = Linear(sparse_feats, vocab_sizes)
        self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)
        self.FM = FM()
        self.DNN = DNN()
        
    def bulid_model(self):
        all_inputs = [self.dense_inputs, self.sparse_inputs]
        
        linear_output = self.Linear(all_inputs)
        concat_sparse_embeds = self.SparseEmbedding(self.sparse_inputs)
        snd_order_sparse_output = self.FM(concat_sparse_embeds)
        fc_layer_output = self.DNN(concat_sparse_embeds)
        
        # 输出部分
        output = Add()([linear_output, snd_order_sparse_output, fc_layer_output])
        output = Activation('sigmoid')(output)
        
        model = Model(inputs=all_inputs, outputs=output)
        return model

In [3]:
train_data = total_data.loc[:500000-1]
valid_data = total_data.loc[500000:]

train_dense_x_all = [train_data[f].values for f in dense_feats]
train_sparse_x_all = [train_data[f].values for f in sparse_feats]
train_label_all = train_data[['label']].values

val_dense_x_all = [valid_data[f].values for f in dense_feats]
val_sparse_x_all = [valid_data[f].values for f in sparse_feats]
val_label_all = valid_data[['label']].values

In [14]:
model = DeepFM(dense_feats, sparse_feats, vocab_sizes).bulid_model()
model.compile(optimizer='rmsprop', loss='binary_crossentropy', 
              metrics=['binary_crossentropy', tf.keras.metrics.AUC()])

os.makedirs('checkpoints/model.h5', exist_ok=True)
checkpoints = ModelCheckpoint('checkpoints', monitor='val_auc', 
                              mode='max', save_weights_only=True)# , save_best_only=True
early_stopping = EarlyStopping(monitor='val_auc', min_delta=0.0001, patience=5)
def scheduler(epoch):
    thred = 10
    if epoch < thred:
        return 0.001
    else:
        return 0.001 * tf.math.exp(0.1 * (thred - epoch))
lr_schedule = LearningRateScheduler(scheduler)
callbacks = [checkpoints, early_stopping, lr_schedule]


model.fit([train_dense_x_all, train_sparse_x_all], train_label_all, batch_size=256,
         validation_data=([val_dense_x_all, val_sparse_x_all], val_label_all),
         callbacks=callbacks, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7fc618863450>

# 附：继承 Model 的模型构建方法

In [7]:
class DeepFM(tf.keras.Model):
    def __init__(self, dense_feats, sparse_feats, vocab_sizes, embed_dims=8):
        super().__init__()
        self.dense_feats = dense_feats
        self.sparse_feats = sparse_feats
        self.vocab_sizes = vocab_sizes
        self.embed_dims = embed_dims
        
        self.Linear = Linear(sparse_feats, vocab_sizes)
        self.SparseEmbedding = SparseEmbedding(sparse_feats, vocab_sizes, embed_dims=8)
        self.FM = FM()
        self.DNN = DNN()
        
    
    def call(self, inputs, training=True):
        dense_inputs, sparse_inputs = inputs[0], inputs[1]
        
        linear_output = self.Linear(inputs)
        concat_sparse_embeds = self.SparseEmbedding(sparse_inputs)
        snd_order_sparse_output = self.FM(concat_sparse_embeds)
        fc_layer_output = self.DNN(concat_sparse_embeds)
        
        # 输出部分
        output = Add()([linear_output, snd_order_sparse_output, fc_layer_output])
        output = Activation('sigmoid')(output)
        return output

In [53]:
train_data = total_data.loc[:500000-1]
valid_data = total_data.loc[500000:]

train_dense_x_all = np.array([train_data[[f]].values for f in dense_feats])
train_sparse_x_all = np.array([train_data[[f]].values for f in sparse_feats])
train_label_all = train_data[['label']].values

val_dense_x_all = np.array([valid_data[[f]].values for f in dense_feats])
val_sparse_x_all = np.array([valid_data[[f]].values for f in sparse_feats])
val_label_all = valid_data[['label']].values


model = DeepFM(dense_feats, sparse_feats, vocab_sizes)
opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
batch_size = 256
for i in range(total_data.shape[0]//batch_size):
    train_dense_x = list(train_dense_x_all[:,i*batch_size:(i+1)*batch_size,:])
    train_sparse_x = list(train_sparse_x_all[:,i*batch_size:(i+1)*batch_size,:])
    train_label = train_label_all[i*batch_size:(i+1)*batch_size]
    
    with tf.GradientTape() as tape:
        pred = model([train_dense_x, train_sparse_x])
        loss = tf.keras.losses.binary_crossentropy(train_label, pred)
        loss = tf.reduce_mean(loss)
    grads = tape.gradient(loss, model.variables)
    opt.apply_gradients(grads_and_vars=zip(grads, model.variables))
    
    
    if i%10 ==0:
        val_dense_x = list(val_dense_x_all)
        val_sparse_x = list(val_sparse_x_all)
        val_label = val_label_all

        pred = model([val_dense_x, val_sparse_x])
        val_loss = tf.keras.losses.binary_crossentropy(val_label, pred)
        val_loss = tf.reduce_mean(val_loss)
        print('train_loss', loss.numpy(), 'val_loss', val_loss.numpy())



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

train_loss 2.4098854 val_loss 2.483604
train_loss 1.3211236 val_loss 1.3194331
train_loss 1.169803 val_loss 1.0407462
train_loss 0.99219334 val_loss 1.059052
train_loss 0.8957213 val_loss 0.92486346
train_loss 0.9512948 val_loss 0.9190863
train_loss 0.95848316 val_loss 0.8585176
train_loss 0.8645132 val_loss 0.83732563
train_loss 0.7037978 val_loss 0.805529
train_loss 0.67349327 val_loss 0.8104939
train_loss 0.7041616 val_loss 0.7876595
train_loss 0.6040318 val_loss 0.7762838
train_loss 0.6661148 val_loss 0.7730615
train_loss 0.62362516 val_loss 0.7610869
train_loss 0.7019628 val_loss 0.7555212
train_loss 0.66617835 val_loss 0.7523162
train_loss 0.7438489 val_loss 0.7429388
train_loss 0.659

KeyboardInterrupt: 