In [1]:
import tensorflow as tf
import numpy as np
from rnn import dynamic_rnn
from tensorflow.python.ops.rnn_cell import *
# from tensorflow.contrib.rnn.python.ops.core_rnn_cell import _linear as _Linear
# from tensorflow.contrib.rnn.python.ops.core_rnn_cell import _Linear

from tensorflow import keras
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variable_scope as vs
from keras import backend as K
from tensorflow.python.ops.rnn_cell import RNNCell, GRUCell

Using TensorFlow backend.


In [2]:
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import embedding_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import rnn_cell_impl
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest

In [3]:
RNNCell = rnn_cell_impl.RNNCell
_WEIGHTS_VARIABLE_NAME = rnn_cell_impl._WEIGHTS_VARIABLE_NAME
_BIAS_VARIABLE_NAME = rnn_cell_impl._BIAS_VARIABLE_NAME

class _Linear(object):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

    Args:
    args: a 2D Tensor or a list of 2D, batch, n, Tensors.
    output_size: int, second dimension of weight variable.
    dtype: data type for variables.
    build_bias: boolean, whether to build a bias variable.
    bias_initializer: starting value to initialize the bias
      (default is all zeros).
    kernel_initializer: starting value to initialize the weight.

    Raises:
    ValueError: if inputs_shape is wrong.
    """

    def __init__(self,
               args,
               output_size,
               build_bias,
               bias_initializer=None,
               kernel_initializer=None):
        self._build_bias = build_bias

        if args is None or (nest.is_sequence(args) and not args):
            raise ValueError("`args` must be specified")
        if not nest.is_sequence(args):
            args = [args]
            self._is_sequence = False
        else:
            self._is_sequence = True

        # Calculate the total size of arguments on dimension 1.
        total_arg_size = 0
        shapes = [a.get_shape() for a in args]
        for shape in shapes:
            if shape.ndims != 2:
                raise ValueError("linear is expecting 2D arguments: %s" % shapes)
            if shape.dims[1].value is None:
                raise ValueError("linear expects shape[1] to be provided for shape %s, "
                             "but saw %s" % (shape, shape[1]))
            else:
                total_arg_size += shape.dims[1].value

        dtype = [a.dtype for a in args][0]

        scope = vs.get_variable_scope()
        with vs.variable_scope(scope) as outer_scope:
            self._weights = vs.get_variable(
              _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size],
              dtype=dtype,
              initializer=kernel_initializer)
            if build_bias:
                with vs.variable_scope(outer_scope) as inner_scope:
                    inner_scope.set_partitioner(None)
                if bias_initializer is None:
                    bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
                self._biases = vs.get_variable(
                  _BIAS_VARIABLE_NAME, [output_size],
                  dtype=dtype,
                  initializer=bias_initializer)

    def __call__(self, args):
        if not self._is_sequence:
            args = [args]

        if len(args) == 1:
            res = math_ops.matmul(args[0], self._weights)
        else:
          # Explicitly creating a one for a minor performance improvement.
            one = constant_op.constant(1, dtype=dtypes.int32)
            res = math_ops.matmul(array_ops.concat(args, one), self._weights)
        if self._build_bias:
            res = nn_ops.bias_add(res, self._biases)
        return res

In [41]:
class VecAttGRUCell(RNNCell):
    """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
    Args:
      num_units: int, The number of units in the GRU cell.
      activation: Nonlinearity to use.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
       in an existing scope.  If not `True`, and the existing scope already has
       the given variables, an error is raised.
      kernel_initializer: (optional) The initializer to use for the weight and
      projection matrices.
      bias_initializer: (optional) The initializer to use for the bias.
    """

    def __init__(self,
                 num_units,
                 activation=None,
                 reuse=None,
                 kernel_initializer=None,
                 bias_initializer=None):
        super(VecAttGRUCell, self).__init__(_reuse=reuse)
        self._num_units = num_units
        self._activation = activation or math_ops.tanh
        self._kernel_initializer = kernel_initializer
        self._bias_initializer = bias_initializer
        self._gate_linear = None
        self._candidate_linear = None

    @property
    def state_size(self):
        return self._num_units

    @property
    def output_size(self):
        return self._num_units

    def __call__(self, inputs, state, att_score):
        return self.call(inputs, state, att_score)

    def call(self, inputs, state, att_score=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(
                    1.0, dtype=inputs.dtype)
            with vs.variable_scope("gates"):  # Reset gate and update gate.
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        #value = math_ops.sigmoid(self._gate_linear)
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)
#         print(self._candidate_linear)
        c = self._activation(self._candidate_linear([inputs, r_state]))
#         c = self._activation(self._candidate_linear)
        u = (1.0 - att_score) * u
        new_h = u * state + (1 - u) * c
        return new_h, new_h

    
def din_fcn_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, forCnn=False):
    if isinstance(facts, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        facts = tf.concat(facts, 2)
    if len(facts.get_shape().as_list()) == 2:
        facts = tf.expand_dims(facts, 1)

    if time_major:
        # (T,B,D) => (B,T,D)
        facts = tf.array_ops.transpose(facts, [1, 0, 2])
    # Trainable parameters
    mask = tf.equal(mask, tf.ones_like(mask))
    facts_size = facts.get_shape().as_list()[-1]  # D value - hidden size of the RNN layer
    querry_size = query.get_shape().as_list()[-1]
    query = tf.layers.dense(query, facts_size, activation=None, name='f1' + stag)
    query = prelu(query)
    queries = tf.tile(query, [1, tf.shape(facts)[1]])
    queries = tf.reshape(queries, tf.shape(facts))
    din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1)
    d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag)
    d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag)
    d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag)
    d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]])
    scores = d_layer_3_all
    # Mask
    # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1])   # [B, T]
    key_masks = tf.expand_dims(mask, 1) # [B, 1, T]
    paddings = tf.ones_like(scores) * (-2 ** 32 + 1)
    if not forCnn:
        scores = tf.where(key_masks, scores, paddings)  # [B, 1, T]

    # Scale
    # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5)

    # Activation
    if softmax_stag:
        scores = tf.nn.softmax(scores)  # [B, 1, T]

    # Weighted sum
    if mode == 'SUM':
        output = tf.matmul(scores, facts)  # [B, 1, H]
        # output = tf.reshape(output, [-1, tf.shape(facts)[-1]])
    else:
        scores = tf.reshape(scores, [-1, tf.shape(facts)[1]])
        output = facts * tf.expand_dims(scores, -1) # tf.expand_dims(scores, -1) => [B, T, 1], facts => [B,T,H]
        output = tf.reshape(output, tf.shape(facts)) # [B, T, H]
    if return_alphas:
        return output, scores
    return output


def prelu(_x, scope=''):
    """parametric ReLU activation"""
    with tf.variable_scope(name_or_scope=scope, default_name="prelu"):
        _alpha = tf.get_variable("prelu_"+scope, shape=_x.get_shape()[-1],
                                 dtype=_x.dtype, initializer=tf.constant_initializer(0.1))
        return tf.maximum(0.0, _x) + _alpha * tf.minimum(0.0, _x)
    

def auxiliary_net(input_x, stag='auxiliary_net', reuse=tf.AUTO_REUSE):
    with tf.variable_scope("aux", reuse=reuse):
        bn1 = tf.layers.batch_normalization(inputs=input_x, name='bn1' + stag, reuse=tf.AUTO_REUSE, training=True)
        dnn1 = tf.layers.dense(bn1, 32, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE)
        dnn1 = tf.nn.relu(dnn1)
        dnn2 = tf.layers.dense(dnn1, 1, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE)
        y_hat = tf.nn.sigmoid(dnn2)
    return y_hat


def auxiliary_loss(h_states, click_label, mask, stag=None):
    #h_states = tf.placeholder(tf.float32, [None, 10, 32])
    #mask = tf.placeholder(tf.float32, [None, 10, 1])
    # click_label = tf.placeholder(tf.float32, [None, 10, 1])
    # 每一个 用户状态序列中，如果长度不足，则将 click_prob 中根据补零数据预测出来的部分转化成0，同时label中不足长度的部分标记为0，以此来做处理
    click_prob = auxiliary_net(h_states) * mask
    loss=tf.reduce_mean(-tf.reduce_mean(1.0*click_label*tf.log(click_prob) + (1-click_label)*tf.log(1-click_prob), reduction_indices=1))
    return loss

In [42]:
SEQ_LENGTH=10
FEA_NUM=40
HIDDEN_SIZE=32

tf.reset_default_graph()

hist_item_emb = tf.placeholder(tf.float32, [None, SEQ_LENGTH, FEA_NUM])
seq_length = tf.placeholder(tf.int32, [None])
target_item_emb = tf.placeholder(tf.float32,[None, FEA_NUM])
mask = tf.placeholder(tf.float32, [None, None]) # [B, T]
click_label = tf.placeholder(tf.float32, [None, SEQ_LENGTH, 1])
aux_mask = tf.expand_dims(mask, 2) # [B, T, 1]

print(aux_mask)

# RNN for history sequence
with tf.name_scope('rnn_hist'):
    rnn_outputs, _ = dynamic_rnn(GRUCell(HIDDEN_SIZE), 
                                 inputs=hist_item_emb,
                                 sequence_length=seq_length, 
                                 dtype=tf.float32,
                                 scope="gru_hist")
#     tf.summary.histogram('GRU_output', rnn_outputs)

with tf.name_scope('auxiliary_net_loss'):
    aux_loss = auxiliary_loss(rnn_outputs, click_label, aux_mask)


# Attention layer
with tf.name_scope('Attention_layer'):
    att_outputs, alphas = din_fcn_attention(target_item_emb, 
                                            rnn_outputs, 
                                            HIDDEN_SIZE, 
                                            mask=mask,
                                            softmax_stag=1, 
                                            stag='1_1', 
                                            mode='LIST', 
                                            return_alphas=True)
    
#     tf.summary.histogram('alpha_output', alphas)
    
# RNN for top history sequence
with tf.name_scope('rnn_top'):
    rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), 
                                             inputs=rnn_outputs,
                                             att_scores =tf.expand_dims(alphas, -1),
                                             sequence_length=seq_length,
                                             dtype=tf.float32,
                                             scope="gru2")
    
with tf.name_scope('fc'):
    o1 = tf.layers.dense(final_state2, 16, activation=None)
    o2 = tf.nn.relu(o1)
    o3 = tf.layers.dense(o2, 1, activation=None)
    pred = tf.nn.sigmoid(o3)

Tensor("ExpandDims:0", shape=(?, ?, 1), dtype=float32)


In [43]:
rnn_outputs, auxiliary_loss, att_outputs, alphas, rnn_outputs2, final_state2, pred

(<tf.Tensor 'rnn_hist/gru_hist/transpose:0' shape=(?, 10, 32) dtype=float32>,
 <function __main__.auxiliary_loss(h_states, click_label, mask, stag=None)>,
 <tf.Tensor 'Attention_layer/Reshape_3:0' shape=(?, 10, 32) dtype=float32>,
 <tf.Tensor 'Attention_layer/Reshape_2:0' shape=(?, ?) dtype=float32>,
 <tf.Tensor 'rnn_top/gru2/transpose:0' shape=(?, 10, 32) dtype=float32>,
 <tf.Tensor 'rnn_top/gru2/while/Exit_2:0' shape=(?, 32) dtype=float32>,
 <tf.Tensor 'fc/Sigmoid:0' shape=(?, 1) dtype=float32>)

In [44]:
params = [param for param in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)]

In [45]:
params

[<tf.Variable 'gru_hist/gru_cell/gates/kernel:0' shape=(72, 64) dtype=float32_ref>,
 <tf.Variable 'gru_hist/gru_cell/gates/bias:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'gru_hist/gru_cell/candidate/kernel:0' shape=(72, 32) dtype=float32_ref>,
 <tf.Variable 'gru_hist/gru_cell/candidate/bias:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'aux/bn1auxiliary_net/gamma:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'aux/bn1auxiliary_net/beta:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'aux/bn1auxiliary_net/moving_mean:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'aux/bn1auxiliary_net/moving_variance:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'aux/f1auxiliary_net/kernel:0' shape=(32, 32) dtype=float32_ref>,
 <tf.Variable 'aux/f1auxiliary_net/bias:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'aux/f2auxiliary_net/kernel:0' shape=(32, 1) dtype=float32_ref>,
 <tf.Variable 'aux/f2auxiliary_net/bias:0' shape=(1,) dtype=float32_ref>,
 <tf.Variable 'f11_1/kernel:0' shap

In [46]:
y_true = tf.placeholder(tf.float32, [None, 1])
loss_train = tf.reduce_mean(-tf.reduce_sum(1.0*y_true*tf.log(pred) + (1-y_true)*tf.log(1-pred), reduction_indices=1))

In [47]:
y_true

<tf.Tensor 'Placeholder_5:0' shape=(?, 1) dtype=float32>

In [48]:
loss = loss_train + aux_loss
loss

<tf.Tensor 'add_1:0' shape=() dtype=float32>

In [49]:
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, var_list = params) 

In [50]:
train_op

<tf.Operation 'Adam' type=NoOp>

In [73]:
sess = tf.Session()

In [74]:
sess.run(tf.global_variables_initializer()) #初始化变量    

In [85]:
# hist_item_emb = tf.placeholder(tf.float32, [None, SEQ_LENGTH, FEA_NUM])
# seq_length = tf.placeholder(tf.int32, [None])
# target_item_emb = tf.placeholder(tf.float32,[None, FEA_NUM])
# mask = tf.placeholder(tf.float32, [None, None]) # [B, T]
# click_label = tf.placeholder(tf.float32, [None, SEQ_LENGTH, 1])
# aux_mask = tf.expand_dims(mask, 2) # [B, T, 1]

hist_item_emb_ = np.random.rand(5120,10,40)
seq_length_ = np.random.randint(1,10, (5120))
target_item_emb_ = np.random.rand(5120, 40)
mask_ = np.random.rand(5120, 10)
click_label_ = np.random.randint(0,2, (5120, 10, 1))
# aux_mask = tf.expand_dims(mask, 2) 
y_true_ = np.random.randint(0,2,(5120, 1))

In [86]:
y_true_.shape

(5120, 1)

In [89]:
for _ in range(100):
    
    _ = sess.run([train_op], {
                            hist_item_emb:hist_item_emb_,
                            seq_length:seq_length_,
                            target_item_emb:target_item_emb_,
                            mask:mask_,
                            click_label:click_label_,
                            y_true:y_true_
                           })
    hist_item_emb_ = np.random.rand(5120,10,40)
    seq_length_ = np.random.randint(1,10, (5120))
    target_item_emb_ = np.random.rand(5120, 40)
    mask_ = np.random.rand(5120, 10)
    click_label_ = np.random.randint(0,1, (5120, 10, 1))
    y_true_ = np.random.randint(0,1,(5120, 1))
    
    loss1, loss2 = sess.run([loss_train, aux_loss], {
                            hist_item_emb:hist_item_emb_,
                            seq_length:seq_length_,
                            target_item_emb:target_item_emb_,
                            mask:mask_,
                            click_label:click_label_,
                            y_true:y_true_
                           })
    
    print(loss1, loss2)

0.00059676846 0.0021557226
0.0006191259 0.0020848073
0.00058878335 0.0021654188
0.0006305096 0.0020741227
0.0006068589 0.0019889178
0.0005889498 0.0019987815
0.0006311506 0.0019951523
0.0005913933 0.0019470481
0.00056619116 0.0019450344
0.0005399228 0.0019892529
0.0005686432 0.0018641523
0.0005417549 0.0019100204
0.00055691536 0.0018468474
0.0005571193 0.0018342842
0.0005238411 0.0018153994
0.0005319339 0.0018153858
0.00054773287 0.0017534752
0.0005331985 0.0017464008
0.00056321593 0.0016933263
0.0005456777 0.0017470482
0.00052895735 0.001698118
0.0004876079 0.0016912373
0.0005102125 0.001669775
0.0005172413 0.0016325644
0.0004925204 0.0016179007
0.00049863616 0.0016043857
0.0004910901 0.0016223409
0.00048507997 0.0016142877
0.00046554144 0.0015594846
0.0004885109 0.00155946
0.00045252306 0.0015332955
0.00047434625 0.0015017588
0.00046157464 0.0015112818
0.0004771597 0.0015021015
0.00046482505 0.0014795323
0.00046771168 0.001465661
0.0004593689 0.0014384973
0.0004598759 0.0014326426
0.