In [1]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
train_file = '../data/train.csv'
test_file = '../data/test.csv'
NUMERIC_COLS = [
    "ps_reg_01", "ps_reg_02", "ps_reg_03", "ps_car_12", "ps_car_13",
    "ps_car_14", "ps_car_15"
]
IGNORE_COLS = [
    "id", "target", "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
    "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08", "ps_calc_09",
    "ps_calc_10", "ps_calc_11", "ps_calc_12", "ps_calc_13", "ps_calc_14",
    "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin", "ps_calc_18_bin",
    "ps_calc_19_bin", "ps_calc_20_bin"
]
dfTrain = pd.read_csv(train_file)
dfTest = pd.read_csv(test_file)

In [3]:
df = pd.concat([dfTrain, dfTest])
# 特征字典，key是每一列，即每个field，value是每个值对应的feature_id
feature_dict = {}
# 特征总数量
total_feature = 0
for col in df.columns:
    if col in IGNORE_COLS:
        continue
    elif col in NUMERIC_COLS:
        # 数字类型列，作为一个特征
        feature_dict[col] = total_feature
        total_feature += 1
    else:
        # 查看这一列有多少个unique的值
        unique_val = df[col].unique()
        feature_dict[col] = dict(
            zip(unique_val,
                range(total_feature,
                      len(unique_val) + total_feature)))
        total_feature += len(unique_val)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [4]:
train_y = dfTrain[['target']].values.tolist()
dfTrain.drop(['target', 'id'], axis=1, inplace=True)
train_feature_index = dfTrain.copy()
train_feature_value = dfTrain.copy()
for col in train_feature_index.columns:
    if col in IGNORE_COLS:
        train_feature_index.drop(col, axis=1, inplace=True)
        train_feature_value.drop(col, axis=1, inplace=True)
        continue
    elif col in NUMERIC_COLS:
        train_feature_index[col] = feature_dict[col]
    else:
        train_feature_index[col] = train_feature_index[col].map(
            feature_dict[col])
        train_feature_value[col] = 1
train_y = np.reshape(np.array(train_y), (-1, 1))

In [5]:
afm_params = {
    "embedding_size": 8,
    "attention_size": 10,
    "deep_layers": [32, 32],
    "dropout_deep": [0.5, 0.5, 0.5],
    "deep_layer_activation": tf.nn.relu,
    "epoch": 30,
    "batch_size": 1024,
    "learning_rate": 0.001,
    "optimizer": "adam",
    "batch_norm": 1,
    "batch_norm_decay": 0.995,
    "verbose": True,
    "random_seed": 0,
    "deep_init_size": 50,
    "use_inner": False
}
afm_params['feature_size'] = total_feature
afm_params['field_size'] = len(train_feature_index.columns)

In [6]:
weights = dict()
weights['feature_embeddings'] = tf.Variable(tf.random_normal(
    [afm_params['feature_size'], afm_params['embedding_size']],
    mean=0.0,
    stddev=0.01),
                                            name='feauter_embeddings')
weights['feature_bias'] = tf.Variable(tf.random_normal(
    [afm_params['feature_size'], 1], mean=0.0, stddev=0.01),
                                      name='feature_bias')
weights['bias'] = tf.Variable(tf.constant(0.1), name='bias')

glorot = 2.0 / (afm_params['attention_size'] + afm_params['embedding_size'])
weights['attention_w'] = tf.Variable(tf.random_normal(
    [afm_params['embedding_size'], afm_params['attention_size']],
    mean=0.0,
    stddev=glorot),
                                     name='attention_w')
weights['attention_b'] = tf.Variable(tf.random_normal([
    afm_params['attention_size'],
],
                                                      mean=0.0,
                                                      stddev=glorot),
                                     name='attention_b')
weights['attention_h'] = tf.Variable(tf.random_normal([
    afm_params['attention_size'],
],
                                                      mean=0.0,
                                                      stddev=glorot),
                                     name='attention_b')
weights['attention_p'] = tf.Variable(tf.random_normal(
    [afm_params['embedding_size'], 1], mean=0.0, stddev=glorot),
                                     name='attention_b')

In [7]:
feat_index = tf.placeholder(tf.int32, shape=[None, None], name='feat_index')
feat_value = tf.placeholder(tf.float32, shape=[None, None], name='feat_value')
label = tf.placeholder(tf.float32, shape=[None, 1], name='label')

In [8]:
embeddings = tf.nn.embedding_lookup(weights['feature_embeddings'], feat_index)
reshaped_feat_value = tf.reshape(feat_value, [-1, afm_params['field_size'], 1])
embeddings = tf.multiply(embeddings, reshaped_feat_value)

In [9]:
# first order part
y_first_order = tf.nn.embedding_lookup(weights['feature_bias'], feat_index)
y_first_order_output = tf.reduce_sum(tf.multiply(y_first_order, reshaped_feat_value),2)

In [10]:
# element wise-> vi * vj
element_wise_product_list = []
for i in range(0, afm_params['field_size'] - 1):
    for j in range(i + 1, afm_params['field_size']):
        element_wise_product_list.append(
            tf.multiply(embeddings[:, i, :], embeddings[:, j, :]))
# (666, ?, 8)
element_wise_product = tf.stack(element_wise_product_list)
# (?, 666, 8)
element_wise_product = tf.transpose(element_wise_product,
                                    perm=[1, 0, 2],
                                    name='element_wise_product')

# attention part
num_iteractions = int(afm_params['field_size'] *
                      (afm_params['field_size'] - 1) / 2)
# (?, 666, 10)
attention_wx_plus_b = tf.reshape(
    tf.matmul(
        tf.reshape(element_wise_product_list,
                   shape=(-1, afm_params['embedding_size'])),
        weights['attention_w']) + weights['attention_b'],
    [-1, num_iteractions, afm_params['attention_size']])

# (?, 666, 1)
attention_exp = tf.exp(
    tf.reduce_sum(tf.multiply(tf.nn.relu(attention_wx_plus_b),
                              weights['attention_h']),
                  axis=2,
                  keep_dims=True))
# (?, 1, 1)
attention_exp_sum = tf.reduce_sum(attention_exp, axis=1, keep_dims=True)
# (?, 666, 1)
attention_out = tf.div(attention_exp, attention_exp_sum, name='attention_out')
# (?, 8)
attention_x_product = tf.reduce_sum(tf.multiply(attention_out,
                                                element_wise_product),
                                    axis=1,
                                    name='afm')
attention_part_sum = tf.matmul(attention_x_product, weights['attention_p'])

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [11]:
y_bias = weights['bias'] * tf.ones_like(label)
out = tf.add_n([tf.reduce_sum(y_first_order_output, axis=1, keep_dims=True),
               attention_part_sum, y_bias])
out = tf.nn.sigmoid(out)
loss = tf.losses.log_loss(label, out)
optimizer = tf.train.AdamOptimizer(learning_rate=afm_params['learning_rate'],
                                   beta1=0.9,
                                   beta2=0.999,
                                   epsilon=1e-8).minimize(loss)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [12]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    batch_size = int(len(train_feature_index)/afm_params['batch_size'])
    for i in range(afm_params['epoch']):
        for j in range(batch_size):
            start = i * afm_params['batch_size']
            end = (i+1) * afm_params['batch_size']
            end = end if end<len(train_feature_index) else len(train_feature_index)
            feat_index_batch = train_feature_index[start:end]
            feat_value_batch = train_feature_value[start:end]
            label_batch = train_y[start:end]
            feed_dict = {
                feat_index:feat_index_batch,
                feat_value:feat_value_batch,
                label:label_batch
            }
            l,o = sess.run([loss,optimizer], feed_dict)
            print(l)

0.7524369
0.73426753
0.7164515
0.69899505
0.68190384
0.665183
0.6488371
0.63286984
0.61728424
0.60315657
0.5885144
0.57426316
0.560402
0.5469296
0.5338442
0.5211432
0.50882363
0.4968817
0.48174083
0.4702792
0.45916504
0.4483965
0.4379704
0.4278827
0.41812855
0.40870252
0.39959845
0.39354745
0.38513884
0.37702805
0.36920774
0.3616704
0.3544082
0.34741324
0.34067762
0.33419308
0.32002747
0.31384528
0.30787823
0.30212092
0.29656765
0.29121262
0.2860498
0.28107333
0.27627707
0.28264448
0.27843407
0.27439252
0.27051222
0.26678607
0.2632072
0.259769
0.2564652
0.25328982
0.25130227
0.24841407
0.24563567
0.24296224
0.24038917
0.23791203
0.23552662
0.2332289
0.23101512
0.2354325
0.23346788
0.23157686
0.22975603
0.22800203
0.2263118
0.2246824
0.22311103
0.22159505
0.2297241
0.22841144
0.22714776
0.2259306
0.22475767
0.22362678
0.2225359
0.22148313
0.22046667
0.202573
0.20153496
0.20051938
0.1995263
0.19855571
0.1976075
0.19668147
0.19577745
0.19489516
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
