## DSSM

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from collections import namedtuple, OrderedDict
from copy import copy
from itertools import chain
from collections import defaultdict

from sklearn.metrics import log_loss, roc_auc_score



### moivelens 1M 数据集处理

In [None]:
input_dir = "../../data/ml-1m/"
output_dir = "../../data/ml-1m/dssm/"

In [None]:
users_path = os.path.join(input_dir, "users.dat")
movies_path = os.path.join(input_dir, "movies.dat")
ratings_path = os.path.join(input_dir, "ratings.dat")

users = pd.read_csv(users_path, sep="::", header=None, engine="python",encoding="latin1", names="UserID::Gender::Age::Occupation::Zip-code".split("::"))
movies = pd.read_csv(movies_path, sep="::", header=None, engine="python",encoding="latin1", names="MovieID::Title::Genres".split("::"))
ratings = pd.read_csv(ratings_path, sep="::", header=None, engine="python",encoding="latin1", names="UserID::MovieID::Rating::Timestamp".split("::"))


In [None]:
print("users:", type(users), users.shape, '\n', users.head(5))
print("movies:", type(movies), movies.shape, '\n', movies.head(5))
print("ratings:", type(ratings), ratings.shape, '\n', ratings.head(5))

### 广告数据处理

In [2]:
ad_data_file = "../../data/tx_ad_data/train_sample.csv"
ad_data = pd.read_csv(ad_data_file)

In [3]:
ad_data['label'] = ad_data['label'].replace(-1, 0)
print('ad_data:', ad_data.shape, '\n', ad_data.head(5))

ad_data: (100000, 3) 
     aid       uid  label
0   411  40083340      0
1  1119  28450328      0
2   875  13700924      0
3  1566  45588256      0
4  1749  18791606      0


In [4]:
sparse_features = ['aid', 'uid']
dense_features = []

user_features=['aid']
item_features = ['uid']
target = ['label']

ad_data[sparse_features] = ad_data[sparse_features].fillna('-1',)
ad_data[dense_features] = ad_data[dense_features].fillna(0, )


In [5]:
# Negative_Sample
from collections import OrderedDict, Counter
def Negative_Sample(data, user_col, item_col, label_col, ratio, method_id=2):
    """
    :param data: training data
    :param user_col: user column name
    :param item_col: item column name for negative sampling
    :param label_col: label column name
    :param ratio: negative sample ratio, >= 1
    :param method_id: {0 : "random sampling", 1: "sampling method used in word2vec", 2: "tencent RALM sampling"}
    :return: new_dataframe, (user_id, item_id, label)
    """
    if not isinstance(ratio, int) or ratio < 1:
        raise ValueError("ratio means neg/pos, it should be greater than or equal to 1")
    items_cnt = Counter(data[item_col])
    items_cnt_order = OrderedDict(sorted((items_cnt.items()), key=lambda x:x[1], reverse=True))
    #print(items_cnt_order)
    user_pos_item = data[data[label_col]==1].drop(label_col, axis=1).groupby(user_col).agg(list).reset_index()
    
    if method_id == 0:
        def sample(row):
            neg_items = np.random.choice(list(items_cnt.keys()), size=ratio, replace=False)
            neg_items = [neg for neg in neg_items if neg not in row[item_col]]
            return neg_items
        user_pos_item['neg_' + item_col] = user_pos_item.apply(sample, axis=1)
    elif method_id == 1:
        items_cnt_freq = {item: count/len(items_cnt) for item,count in items_cnt_order.items()}
        p_sel = {item: np.sqrt(1e-5/items_cnt_freq[item]) for item in items_cnt_order}
        p_value = np.array(list(p_sel.values()))/sum(p_sel.values())
        def sample(row):
            neg_items = np.random.choice(list(items_cnt.keys()), size=ratio, replace=False, p=p_value)
            neg_items = [neg for neg in neg_items if neg not in row[item_col]]
            return neg_items
        user_pos_item['neg_' + item_col] = user_pos_item.apply(sample, axis=1)
    elif method_id == 2:
        p_sel = {item: (np.log(k + 2) - np.log(k + 1) / np.log(len(items_cnt_order) + 1)) for item, k in
                 items_cnt_order.items()}
        p_value = np.array(list(p_sel.values())) / sum(p_sel.values())
        def sample(row):
            neg_items = np.random.choice(list(items_cnt.keys()), size=ratio, replace=False, p=p_value)
            neg_items = [neg for neg in neg_items if neg not in row[item_col]]
            return neg_items
        user_pos_item['neg_'+item_col] = user_pos_item.apply(sample, axis=1)
    else:
        raise ValueError("method id should in (0,1,2)")
        
    #print(user_pos_item)

    neg_data = pd.DataFrame({user_col: user_pos_item[user_col], 'neg_'+item_col: user_pos_item['neg_'+item_col]})
    #print('neg_data', neg_data)
    neg_data = neg_data.rename(columns={'neg_' + item_col: item_col}, inplace=False)
    #print('neg_data', neg_data)

    pos_data = pd.DataFrame({user_col: user_pos_item[user_col], item_col: user_pos_item[item_col]})
    #print('pos_data', pos_data)

    pos_data[label_col] = 1
    neg_data[label_col] = 0
    neg_data = neg_data.explode('uid')
    
    pos_data = pos_data.explode('uid')
    #print('neg_data', neg_data)
    #print('pos_data', pos_data)

    return pd.concat([pos_data, neg_data])

def Cosine_Similarity(query, candidate, gamma=1, axis=-1):
    query_norm = tf.norm(query, axis=axis)
    candidate_norm = tf.norm(candidate, axis=axis)
    cosine_score = tf.reduce_sum(tf.multiply(query, candidate), -1)
    cosine_score = tf.divide(cosine_score, query_norm*candidate_norm+1e-8)
    cosine_score = tf.clip_by_value(cosine_score, -1, 1.0)*gamma
    return cosine_score

In [6]:
def get_inputs_list(inputs):
    return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs)))))


def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg,
                          prefix='sparse_', seq_mask_zero=True):
    sparse_embedding = {}
    for feat in sparse_feature_columns:
        emb = tf.keras.layers.Embedding(feat.vocabulary_size, feat.embedding_dim,
                        embeddings_initializer=feat.embeddings_initializer,
                        embeddings_regularizer=l2(l2_reg),
                        name=prefix + '_emb_' + feat.embedding_name)
        emb.trainable = feat.trainable
        sparse_embedding[feat.embedding_name] = emb

    if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0:
        for feat in varlen_sparse_feature_columns:
            # if feat.name not in sparse_embedding:
            emb = tf.keras.layers.Embedding(feat.vocabulary_size, feat.embedding_dim,
                            embeddings_initializer=feat.embeddings_initializer,
                            embeddings_regularizer=l2(
                                l2_reg),
                            name=prefix + '_seq_emb_' + feat.name,
                            mask_zero=seq_mask_zero)
            emb.trainable = feat.trainable
            sparse_embedding[feat.embedding_name] = emb
    return sparse_embedding

def get_embedding_vec_list(embedding_dict, input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=()):
    embedding_vec_list = []
    for fg in sparse_feature_columns:
        feat_name = fg.name
        if len(return_feat_list) == 0 or feat_name in return_feat_list:
            if fg.use_hash:
                lookup_idx = Hash(fg.vocabulary_size, mask_zero=(feat_name in mask_feat_list), vocabulary_path=fg.vocabulary_path)(input_dict[feat_name])
            else:
                lookup_idx = input_dict[feat_name]

            embedding_vec_list.append(embedding_dict[feat_name](lookup_idx))

    return embedding_vec_list


def create_embedding_matrix(feature_columns, l2_reg, seed, prefix="", seq_mask_zero=True):

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []
    
    sparse_emb_dict = create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed,
                                            l2_reg, prefix=str(prefix) + "sparse", seq_mask_zero=seq_mask_zero)
    return sparse_emb_dict


def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(),
                     mask_feat_list=(), to_list=False):
    group_embedding_dict = defaultdict(list)
    for fc in sparse_feature_columns:
        feature_name = fc.name
        embedding_name = fc.embedding_name
        if (len(return_feat_list) == 0 or feature_name in return_feat_list):
            if fc.use_hash:
                lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list), vocabulary_path=fc.vocabulary_path)(
                    sparse_input_dict[feature_name])
            else:
                lookup_idx = sparse_input_dict[feature_name]

            group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx))
    if to_list:
        return list(chain.from_iterable(group_embedding_dict.values()))
    return group_embedding_dict


def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns):
    varlen_embedding_vec_dict = {}
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        embedding_name = fc.embedding_name
        if fc.use_hash:
            lookup_idx = Hash(fc.vocabulary_size, mask_zero=True, vocabulary_path=fc.vocabulary_path)(sequence_input_dict[feature_name])
        else:
            lookup_idx = sequence_input_dict[feature_name]
        varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx)
    return varlen_embedding_vec_dict


def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_columns, to_list=False):
    pooling_vec_list = defaultdict(list)
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        combiner = fc.combiner
        feature_length_name = fc.length_name
        if feature_length_name is not None:
            if fc.weight_name is not None:
                seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm)(
                    [embedding_dict[feature_name], features[feature_length_name], features[fc.weight_name]])
            else:
                seq_input = embedding_dict[feature_name]
            vec = SequencePoolingLayer(combiner, supports_masking=False)(
                [seq_input, features[feature_length_name]])
        else:
            if fc.weight_name is not None:
                seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm, supports_masking=True)(
                    [embedding_dict[feature_name], features[fc.weight_name]])
            else:
                seq_input = embedding_dict[feature_name]
            vec = SequencePoolingLayer(combiner, supports_masking=True)(
                seq_input)
        pooling_vec_list[fc.group_name].append(vec)
    if to_list:
        return chain.from_iterable(pooling_vec_list.values())
    return pooling_vec_list


def get_dense_input(features, feature_columns):
    
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if feature_columns else []
    dense_input_list = []
    for fc in dense_feature_columns:
        if fc.transform_fn is None:
            dense_input_list.append(features[fc.name])
        else:
            transform_result = tf.keras.initializers.Lambda(fc.transform_fn)(features[fc.name])
            dense_input_list.append(transform_result)
    return dense_input_list


def mergeDict(a, b):
    c = defaultdict(list)
    for k, v in a.items():
        c[k].extend(v)
    for k, v in b.items():
        c[k].extend(v)
    return c

In [7]:
DEFAULT_GROUP_NAME = "default_group"


class SparseFeat(namedtuple('SparseFeat',
                            ['name', 'vocabulary_size', 'embedding_dim', 'use_hash', 'vocabulary_path', 'dtype', 'embeddings_initializer',
                             'embedding_name',
                             'group_name', 'trainable'])):
    __slots__ = ()

    def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, vocabulary_path=None, dtype="int32", embeddings_initializer=None,
                embedding_name=None,
                group_name=DEFAULT_GROUP_NAME, trainable=True):

        if embedding_dim == "auto":
            embedding_dim = 6 * int(pow(vocabulary_size, 0.25))
        if embeddings_initializer is None:
            embeddings_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.0001, seed=2020)

        if embedding_name is None:
            embedding_name = name

        return super(SparseFeat, cls).__new__(cls, name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype,
                                              embeddings_initializer,
                                              embedding_name, group_name, trainable)

    def __hash__(self):
        return self.name.__hash__()


class VarLenSparseFeat(namedtuple('VarLenSparseFeat',
                                  ['sparsefeat', 'maxlen', 'combiner', 'length_name', 'weight_name', 'weight_norm'])):
    __slots__ = ()

    def __new__(cls, sparsefeat, maxlen, combiner="mean", length_name=None, weight_name=None, weight_norm=True):
        return super(VarLenSparseFeat, cls).__new__(cls, sparsefeat, maxlen, combiner, length_name, weight_name,
                                                    weight_norm)

    @property
    def name(self):
        return self.sparsefeat.name

    @property
    def vocabulary_size(self):
        return self.sparsefeat.vocabulary_size

    @property
    def embedding_dim(self):
        return self.sparsefeat.embedding_dim

    @property
    def use_hash(self):
        return self.sparsefeat.use_hash

    @property
    def vocabulary_path(self):
        return self.sparsefeat.vocabulary_path

    @property
    def dtype(self):
        return self.sparsefeat.dtype

    @property
    def embeddings_initializer(self):
        return self.sparsefeat.embeddings_initializer

    @property
    def embedding_name(self):
        return self.sparsefeat.embedding_name

    @property
    def group_name(self):
        return self.sparsefeat.group_name

    @property
    def trainable(self):
        return self.sparsefeat.trainable

    def __hash__(self):
        return self.name.__hash__()


class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype', 'transform_fn'])):
    """ Dense feature
    Args:
        name: feature name,
        dimension: dimension of the feature, default = 1.
        dtype: dtype of the feature, default="float32".
        transform_fn: If not `None` , a function that can be used to transform
        values of the feature.  the function takes the input Tensor as its
        argument, and returns the output Tensor.
        (e.g. lambda x: (x - 3.0) / 4.2).
    """
    __slots__ = ()

    def __new__(cls, name, dimension=1, dtype="float32", transform_fn=None):
        return super(DenseFeat, cls).__new__(cls, name, dimension, dtype, transform_fn)

    def __hash__(self):
        return self.name.__hash__()

    # def __eq__(self, other):
    #     if self.name == other.name:
    #         return True
    #     return False

    # def __repr__(self):
    #     return 'DenseFeat:'+self.name


def get_feature_names(feature_columns):
    features = build_input_features(feature_columns)
    return list(features.keys())


def build_input_features(feature_columns, prefix=''):
    input_features = OrderedDict()
    for fc in feature_columns:
        if isinstance(fc, SparseFeat):
            input_features[fc.name] = tf.keras.Input(
                shape=(1,), name=prefix + fc.name, dtype=fc.dtype)
        elif isinstance(fc, DenseFeat):
            input_features[fc.name] = tf.keras.Input(
                shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype)
        elif isinstance(fc, VarLenSparseFeat):
            input_features[fc.name] = tf.keras.Input(shape=(fc.maxlen,), name=prefix + fc.name,
                                            dtype=fc.dtype)
            if fc.weight_name is not None:
                input_features[fc.weight_name] = tf.keras.Input(shape=(fc.maxlen, 1), name=prefix + fc.weight_name,
                                                       dtype="float32")
            if fc.length_name is not None:
                input_features[fc.length_name] = tf.keras.Input((1,), name=prefix + fc.length_name, dtype='int32')

        else:
            raise TypeError("Invalid feature column type,got", type(fc))

    return input_features


def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear',
                     l2_reg=0, sparse_feat_refine_weight=None):
    linear_feature_columns = copy(feature_columns)
    for i in range(len(linear_feature_columns)):
        if isinstance(linear_feature_columns[i], SparseFeat):
            linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1,
                                                                           embeddings_initializer=tf.keras.initializers.Zeros())
        if isinstance(linear_feature_columns[i], VarLenSparseFeat):
            linear_feature_columns[i] = linear_feature_columns[i]._replace(
                sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1,
                                                                         embeddings_initializer=tf.keras.initializers.Zeros()))

    linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed,
                                                  prefix=prefix + str(i))[0] for i in range(units)]
    _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix)

    linear_logit_list = []
    for i in range(units):

        if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0:
            sparse_input = concat_func(linear_emb_list[i])
            dense_input = concat_func(dense_input_list)
            if sparse_feat_refine_weight is not None:
                sparse_input = tf.keras.initializers.Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
                    [sparse_input, sparse_feat_refine_weight])
            linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input])
        elif len(linear_emb_list[i]) > 0:
            sparse_input = concat_func(linear_emb_list[i])
            if sparse_feat_refine_weight is not None:
                sparse_input = tf.keras.initializers.Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
                    [sparse_input, sparse_feat_refine_weight])
            linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input)
        elif len(dense_input_list) > 0:
            dense_input = concat_func(dense_input_list)
            linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input)
        else:   #empty feature_columns
            return tf.keras.initializers.Lambda(lambda x: tf.constant([[0.0]]))(list(features.values())[0])
        linear_logit_list.append(linear_logit)

    return concat_func(linear_logit_list)


def input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix='', seq_mask_zero=True,
                               support_dense=True, support_group=False):
    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []

    embedding_matrix_dict = create_embedding_matrix(feature_columns, l2_reg, seed, prefix=prefix,
                                                    seq_mask_zero=seq_mask_zero)
    group_sparse_embedding_dict = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns)
    dense_value_list = get_dense_input(features, feature_columns)
    if not support_dense and len(dense_value_list) > 0:
        raise ValueError("DenseFeat is not supported in dnn_feature_columns")

    sequence_embed_dict = varlen_embedding_lookup(embedding_matrix_dict, features, varlen_sparse_feature_columns)
    group_varlen_sparse_embedding_dict = get_varlen_pooling_list(sequence_embed_dict, features,
                                                                 varlen_sparse_feature_columns)
    group_embedding_dict = mergeDict(group_sparse_embedding_dict, group_varlen_sparse_embedding_dict)
    if not support_group:
        group_embedding_dict = list(chain.from_iterable(group_embedding_dict.values()))
    return group_embedding_dict, dense_value_list

In [8]:
import tensorflow as tf

try:
    from tensorflow.python.ops.init_ops import Zeros
except ImportError:
    from tensorflow.python.ops.init_ops_v2 import Zeros
from tensorflow.python.keras.layers import Layer, Activation

try:
    from tensorflow.python.keras.layers import BatchNormalization
except ImportError:
    BatchNormalization = tf.keras.layers.BatchNormalization

try:
    unicode
except NameError:
    unicode = str


class Dice(Layer):
    """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data.
      Input shape
        - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
      Output shape
        - Same shape as the input.
      Arguments
        - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis).
        - **epsilon** : Small float added to variance to avoid dividing by zero.
      References
        - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
    """

    def __init__(self, axis=-1, epsilon=1e-9, **kwargs):
        self.axis = axis
        self.epsilon = epsilon
        super(Dice, self).__init__(**kwargs)

    def build(self, input_shape):
        self.bn = BatchNormalization(
            axis=self.axis, epsilon=self.epsilon, center=False, scale=False)
        self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros(
        ), dtype=tf.float32, name='dice_alpha')  # name='alpha_'+self.name
        super(Dice, self).build(input_shape)  # Be sure to call this somewhere!
        self.uses_learning_phase = True

    def call(self, inputs, training=None, **kwargs):
        inputs_normed = self.bn(inputs, training=training)
        # tf.layers.batch_normalization(
        # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False)
        x_p = tf.sigmoid(inputs_normed)
        return self.alphas * (1.0 - x_p) * inputs + x_p * inputs

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self, ):
        config = {'axis': self.axis, 'epsilon': self.epsilon}
        base_config = super(Dice, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


def activation_layer(activation):
    print('activation_layer',activation)
    if activation in ("dice", "Dice"):
        act_layer = Dice()
    elif isinstance(activation, (str, unicode)):
        act_layer = Activation(activation)
    elif issubclass(activation, Layer):
        act_layer = activation()
    else:
        raise ValueError(
            "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation))
    return act_layer

In [9]:
import tensorflow as tf
from tensorflow.keras import backend as K

try:
    from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal
except ImportError:
    from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal

from tensorflow.keras.layers import Layer, Dropout


from tensorflow.keras.layers import BatchNormalization

from tensorflow.python.keras.regularizers import l2

class DNN(Layer):
    """The Multi Layer Percetron
      Input shape
        - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``.
      Output shape
        - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``.
      Arguments
        - **hidden_units**:list of positive integer, the layer number and units in each layer.
        - **activation**: Activation function to use.
        - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix.
        - **dropout_rate**: float in [0,1). Fraction of the units to dropout.
        - **use_bn**: bool. Whether use BatchNormalization before activation or not.
        - **output_activation**: Activation function to use in the last layer.If ``None``,it will be same as ``activation``.
        - **seed**: A Python integer to use as random seed.
    """

    def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, output_activation=None,
                 seed=1024, **kwargs):
        self.hidden_units = hidden_units
        self.activation = activation
        self.l2_reg = l2_reg
        self.dropout_rate = dropout_rate
        self.use_bn = use_bn
        self.output_activation = output_activation
        self.seed = seed

        super(DNN, self).__init__(**kwargs)

    def build(self, input_shape):
        # if len(self.hidden_units) == 0:
        #     raise ValueError("hidden_units is empty")
        input_size = input_shape[-1]
        hidden_units = [int(input_size)] + list(self.hidden_units)
        self.kernels = [self.add_weight(name='kernel' + str(i),
                                        shape=(
                                            hidden_units[i], hidden_units[i + 1]),
                                        initializer=glorot_normal(
                                            seed=self.seed),
                                        regularizer=l2(self.l2_reg),
                                        trainable=True) for i in range(len(self.hidden_units))]
        self.bias = [self.add_weight(name='bias' + str(i),
                                     shape=(self.hidden_units[i],),
                                     initializer=Zeros(),
                                     trainable=True) for i in range(len(self.hidden_units))]
        if self.use_bn:
            self.bn_layers = [BatchNormalization() for _ in range(len(self.hidden_units))]

        self.dropout_layers = [Dropout(self.dropout_rate, seed=self.seed + i) for i in
                               range(len(self.hidden_units))]

        self.activation_layers = [activation_layer(self.activation) for _ in range(len(self.hidden_units))]

        if self.output_activation:
            self.activation_layers[-1] = activation_layer(self.output_activation)

        super(DNN, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, inputs, training=None, **kwargs):

        deep_input = inputs

        for i in range(len(self.hidden_units)):
            fc = tf.nn.bias_add(tf.tensordot(
                deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i])

            if self.use_bn:
                fc = self.bn_layers[i](fc, training=training)
            try:
                fc = self.activation_layers[i](fc, training=training)
            except TypeError as e:  # TypeError: call() got an unexpected keyword argument 'training'
                print("make sure the activation function use training flag properly", e)
                fc = self.activation_layers[i](fc)

            fc = self.dropout_layers[i](fc, training=training)
            deep_input = fc

        return deep_input

    def compute_output_shape(self, input_shape):
        if len(self.hidden_units) > 0:
            shape = input_shape[:-1] + (self.hidden_units[-1],)
        else:
            shape = input_shape

        return tuple(shape)

    def get_config(self, ):
        config = {'activation': self.activation, 'hidden_units': self.hidden_units,
                  'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate,
                  'output_activation': self.output_activation, 'seed': self.seed}
        base_config = super(DNN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


class PredictionLayer(Layer):
    """
      Arguments
         - **task**: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
         - **use_bias**: bool.Whether add bias term or not.
    """

    def __init__(self, task='binary', use_bias=True, **kwargs):
        if task not in ["binary", "multiclass", "regression"]:
            raise ValueError("task must be binary,multiclass or regression")
        self.task = task
        self.use_bias = use_bias
        super(PredictionLayer, self).__init__(**kwargs)

    def build(self, input_shape):

        if self.use_bias:
            self.global_bias = self.add_weight(
                shape=(1,), initializer=Zeros(), name="global_bias")

        # Be sure to call this somewhere!
        super(PredictionLayer, self).build(input_shape)

    def call(self, inputs, **kwargs):
        x = inputs
        if self.use_bias:
            x = tf.nn.bias_add(x, self.global_bias, data_format='NHWC')
        if self.task == "binary":
            x = tf.sigmoid(x)

        output = tf.reshape(x, (-1, 1))

        return output

    def compute_output_shape(self, input_shape):
        return (None, 1)

    def get_config(self, ):
        config = {'task': self.task, 'use_bias': self.use_bias}
        base_config = super(PredictionLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [10]:
from tensorflow.keras.layers import Flatten, Concatenate, Layer, Add
class NoMask(Layer):
    def __init__(self, **kwargs):
        super(NoMask, self).__init__(**kwargs)

    def build(self, input_shape):
        # Be sure to call this somewhere!
        super(NoMask, self).build(input_shape)

    def call(self, x, mask=None, **kwargs):
        return x

    def compute_mask(self, inputs, mask):
        return None

def concat_func(inputs, axis=-1, mask=False):
    print(len(inputs))
    if not mask:
        inputs = list(map(NoMask(), inputs))
    if len(inputs) == 1:
        return inputs[0]
    else:
        return Concatenate(axis=axis)(inputs)
    
def combined_dnn_input(sparse_embedding_list, dense_value_list):
    if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
        sparse_dnn_input = Flatten()(concat_func(sparse_embedding_list))
        dense_dnn_input = Flatten()(concat_func(dense_value_list))
        return concat_func([sparse_dnn_input, dense_dnn_input])
    elif len(sparse_embedding_list) > 0:
        return Flatten()(concat_func(sparse_embedding_list))
    elif len(dense_value_list) > 0:
        return Flatten()(concat_func(dense_value_list))
    else:
        raise NotImplementedError("dnn_feature_columns can not be empty list")

In [11]:
# Do Negative Sampler
data = Negative_Sample(ad_data, 'aid', 'uid', 'label', 10, method_id=1)

In [12]:
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])
    
# 2.count #unique features for each sparse field,and record dense feature field name
user_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=20) 
                        for i, feat in enumerate(user_features)]
item_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=20)
                           for i, feat in enumerate(item_features)]
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name: train[name] for name in sparse_features}
test_model_input = {name: test[name] for name in sparse_features}

In [13]:
print(data.shape, data.head(5))
print(type(user_feature_columns), len(user_feature_columns), user_feature_columns)


(6480, 3)    aid   uid  label
0    0  2880      1
0    0   254      1
0    0  1719      1
0    0  6355      1
0    0  2042      1
<class 'list'> 1 [SparseFeat(name='aid', vocabulary_size=172, embedding_dim=20, use_hash=False, vocabulary_path=None, dtype='int32', embeddings_initializer=<keras.initializers.initializers_v2.RandomNormal object at 0x7f125d36ba50>, embedding_name='aid', group_name='default_group', trainable=True)]


In [14]:
from tensorflow.keras.models import Model



def DSSM(user_dnn_feature_columns, item_dnn_feature_columns, gamma=1, dnn_use_bn=True, dnn_hidden_units=(300, 300, 128), dnn_activation='tanh',
         l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'):

    user_features = build_input_features(user_dnn_feature_columns)
    user_inputs_list = list(user_features.values())
    user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns(user_features, user_dnn_feature_columns,
                                                                         l2_reg_embedding, init_std, seed)
    user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list)
    #print(user_dnn_input)
    
    item_features = build_input_features(item_dnn_feature_columns)
    item_inputs_list = list(item_features.values())
    item_sparse_embedding_list, item_dense_value_list = input_from_feature_columns(item_features, item_dnn_feature_columns,
                                                                         l2_reg_embedding, init_std, seed)
    item_dnn_input = combined_dnn_input(item_sparse_embedding_list, item_dense_value_list)

    user_dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                  dnn_use_bn, seed=seed, name="user_embedding")(user_dnn_input)
    
    item_dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                  dnn_use_bn, seed=seed, name="item_embedding")(item_dnn_input)

    score = Cosine_Similarity(user_dnn_out, item_dnn_out, gamma=gamma)

    output = PredictionLayer(task, False)(score)

    model = Model(inputs=user_inputs_list+item_inputs_list, outputs=output)

    return model

In [15]:
# 4.Define Model,train,predict and evaluate
model = DSSM(user_feature_columns, item_feature_columns, task='binary')

1


2022-08-24 12:14:14.467961: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib:/usr/local/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-9.0/lib64:/usr/local/cuda-9.0/extras/CUPTI/lib64:/usr/local/cuda-8.0/lib64:/usr/local/cuda-8.0/extras/CUPTI/lib64:/usr/lib:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib:/usr/local/lib:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib:/usr/local/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib:/usr/local/lib:/usr/local/cuda-11.0/lib64:/usr/local/cuda-1

1
activation_layer tanh
activation_layer tanh
activation_layer tanh
activation_layer tanh
activation_layer tanh
activation_layer tanh


In [16]:
model.summary()
model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
                        batch_size=256, epochs=10, verbose=2, validation_split=0.2,)
model.save_weights('../../data/saved_model/dssm.ckpt')
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
aid (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
uid (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
1024sparse_emb_aid (Embedding)  (None, 1, 20)        3440        aid[0][0]                        
__________________________________________________________________________________________________
1024sparse_emb_uid (Embedding)  (None, 1, 20)        127940      uid[0][0]                        
______________________________________________________________________________________________

2022-08-24 12:14:15.164688: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


17/17 - 4s - loss: 0.7046 - binary_crossentropy: 0.7046 - val_loss: 0.8380 - val_binary_crossentropy: 0.8380
Epoch 2/10
17/17 - 1s - loss: 0.4414 - binary_crossentropy: 0.4414 - val_loss: 0.8031 - val_binary_crossentropy: 0.8031
Epoch 3/10
17/17 - 1s - loss: 0.3791 - binary_crossentropy: 0.3791 - val_loss: 0.8234 - val_binary_crossentropy: 0.8234
Epoch 4/10
17/17 - 1s - loss: 0.3576 - binary_crossentropy: 0.3576 - val_loss: 0.8302 - val_binary_crossentropy: 0.8302
Epoch 5/10
17/17 - 1s - loss: 0.3436 - binary_crossentropy: 0.3436 - val_loss: 0.8264 - val_binary_crossentropy: 0.8264
Epoch 6/10
17/17 - 1s - loss: 0.3346 - binary_crossentropy: 0.3346 - val_loss: 0.8114 - val_binary_crossentropy: 0.8114
Epoch 7/10
17/17 - 1s - loss: 0.3291 - binary_crossentropy: 0.3291 - val_loss: 0.7982 - val_binary_crossentropy: 0.7982
Epoch 8/10
17/17 - 1s - loss: 0.3254 - binary_crossentropy: 0.3254 - val_loss: 0.7844 - val_binary_crossentropy: 0.7844
Epoch 9/10
17/17 - 1s - loss: 0.3227 - binary_cross

In [17]:
user_embedding_model = Model(inputs=model.input, outputs=model.get_layer("user_embedding").output)
item_embedding_model = Model(inputs=model.input, outputs=model.get_layer("item_embedding").output)
user_embedding = user_embedding_model.predict(test_model_input)
item_embedding = item_embedding_model.predict(test_model_input)

print("user embedding shape: ", user_embedding.shape)
print("item embedding shape: ", item_embedding.shape)

np.save('../../data/saved_model/user_embedding.npy', user_embedding)
np.save('../../data/saved_model/item_embedding.npy', item_embedding)

user embedding shape:  (1296, 128)
item embedding shape:  (1296, 128)
