In [None]:
class DeepFM(object):
    def __init__(self, cfg):
        self.feature_size = cfg['feature_size']
        self.field_size = cfg['field_size']
        self.embed_size = cfg['embed_size']
        self.dropout_fm = cfg['dropout_fm']
        self.deep_nn = cfg['deep_layers']
        self.dropout_deep = cfg['dropout_deep']
        
        self.w_feature_embed = tf.Variable(tf.random.normal(shape=[cfg['feature_size'], cfg['embed_size']], mean=0.0, stddev=0.01))
        self.w_feature_bias = tf.Variable(tf.random.normal(shape=[cfg['feature_size'], 1], mean=0.0, stddev=0.01))
        self.initializer = tf.keras.initializers.GlorotNormal()
        self.deep_w = dict()
        self.deep_b = dict()

    def __call__(self, feature_idx, feature_val)
        reshaped_feature_val = tf.reshape(feature_val, shape=[-1,self.field_size,1])
        # fm
        first_order = tf.nn.embedding_lookup(self.w_feature_bias,feature_idx)
        fm_first_order = tf.reduce_sum(tf.multiply(first_order,reshaped_feature_val),2)

        embeddings = tf.nn.embedding_lookup(self.w_feature_embed,feature_idx)
        second_inner = tf.multiply(embeddings,reshaped_feature_val)
        
        summed_features_emb = tf.reduce_sum(second_inner,1)
        summed_features_emb_square = tf.square(summed_features_emb)
        summed_features_emb_square.shape
        
        squared_features_emb = tf.square(second_inner)
        squared_sum_features_emb = tf.reduce_sum(squared_features_emb,1)
        squared_sum_features_emb.shape
        
        fm_second_order = 0.5 * tf.subtract(summed_features_emb_square,squared_sum_features_emb)
        
        # dnn
        y_deep = tf.reshape(embeddings,shape=[-1,self.field_size * self.embed_size])
        for layer in range(0, len(self.deep_nn)):
            if layer==0:
                input_size = self.field_size * self.embed_size
                deep_w[layer] = tf.Variable(self.initializer([input_size, self.deep_nn[0]]))
            else:    
                deep_w[layer] = tf.Variable(self.initializer([self.deep_nn[layer-1], self.deep_nn[layer]]))

            deep_b[layer] = tf.Variable(initializer([1, self.deep_nn[layer]]))

            y_deep = tf.add(tf.matmul(y_deep, deep_w[layer]), deep_b[layer])
            y_deep = tf.nn.relu(y_deep)
        
        # concat
        concat = tf.concat([fm_first_order,fm_second_order,y_deep],axis=1)                        
        concat_w = tf.Variable(initializer([concat.shape[1], 1]))
        concat_b = tf.Variable(0.01)
        
        out = tf.nn.sigmoid(tf.add(tf.matmul(concat,concat_w),concat_b))
        return out

In [None]:
def loss(model, x_idx, x_val, y):
    y_ = model(x_idx, x_val)
    loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    return loss_object(y_true=y, y_pred=y_)


In [None]:
def grad(model, x_idx, x_val, labels):
    with tf.GradientTape() as tape:
        loss_val = loss(model, x_idx, x_val, labels)
    return loss_value, tape.gradient(loss_val, model.trainable_variables)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [None]:
demo_index = tf.constant(feature_idx[:3].values)
demo_val = tf.constant(feature_val[:3].values, dtype=tf.float32)


In [None]:
loss_value, grads = grad(model, demo_index, demo_val, labels)
print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(), loss_value.numpy()))