In [1]:
import import_ipynb
from tower_layer import tower_layer
import os
from typing import List, Tuple, Any
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column as fc

importing Jupyter notebook from tower_layer.ipynb


# Model parameters

In [2]:
# 训练参数
model_dir = ".model\\model_dir" #Directory where model parameters, graph, etc are saved
output_dir = ".model\\output_dir" #Directory where pb file are saved

train_data = "E:/Deep Learning/dataset/wechat_bigdata/tfrecord/train.tfrecord" #Path to the train data
eval_data = "E:/Deep Learning/dataset/wechat_bigdata/tfrecord/test.tfrecord" #Path to the evaluation data
vocabulary_dir = "E:/Deep Learning/dataset/wechat_bigdata/vocabulary/" #Folder where the vocabulary file is stored
num_epochs = 10 #Epoch of training phase
train_steps = 100000 #Number of (global) training steps to perform
shuffle_buffer_size = 10000 #Dataset shuffle buffer size
num_parallel_readers = -1 #Number of parallel readers for training data
save_checkpoints_steps = 1000 #Save checkpoints every this many steps

# 模型参数
batch_size = 1024 #Training batch size
learning_rate = 0.005 #Learning rate
hidden_units = [512,256,128] #Comma-separated list of number of units in each hidden layer of the final output part
batch_norm = True, #Perform batch normalization (True or False)
dropout_rate = 0.1 #Dropout rate
num_experts = 3 #Number of experts
expert_hidden_units = 512 #Expert module output dimension
num_tasks = 3 #Number of tasks, that's number of gates
task_names = ["read_comment","like","click_avatar"] #Comma-separated list of task names, each must be in keys of tfrecord file


In [3]:
# 返回一个 tuple 
def create_feature_columns() -> Tuple[list, list, list]:
    """
        生成模型输入特征和label
    Returns:
        dense_feature_columns (list): 连续特征的feature_columns
        category_feature_columns (list): 类别特征的feature_columns(包括序列特征)
        label_feature_columns (list): 因变量的feature_columns
    """

    dense_feature_columns, category_feature_columns, label_feature_columns = [], [], []

    # 连续特征
    videoplayseconds = fc.numeric_column('videoplayseconds', default_value=0.0)
    u_read_comment_7d_sum = fc.numeric_column('u_read_comment_7d_sum', default_value=0.0)
    u_like_7d_sum = fc.numeric_column('u_like_7d_sum', default_value=0.0)
    u_click_avatar_7d_sum = fc.numeric_column('u_click_avatar_7d_sum', default_value=0.0)
    u_forward_7d_sum = fc.numeric_column('u_forward_7d_sum', default_value=0.0)
    u_comment_7d_sum = fc.numeric_column('u_comment_7d_sum', default_value=0.0)
    u_follow_7d_sum = fc.numeric_column('u_follow_7d_sum', default_value=0.0)
    u_favorite_7d_sum = fc.numeric_column('u_favorite_7d_sum', default_value=0.0)

    i_read_comment_7d_sum = fc.numeric_column('i_read_comment_7d_sum', default_value=0.0)
    i_like_7d_sum = fc.numeric_column('i_like_7d_sum', default_value=0.0)
    i_click_avatar_7d_sum = fc.numeric_column('i_click_avatar_7d_sum', default_value=0.0)
    i_forward_7d_sum = fc.numeric_column('i_forward_7d_sum', default_value=0.0)
    i_comment_7d_sum = fc.numeric_column('i_comment_7d_sum', default_value=0.0)
    i_follow_7d_sum = fc.numeric_column('i_follow_7d_sum', default_value=0.0)
    i_favorite_7d_sum = fc.numeric_column('i_favorite_7d_sum', default_value=0.0)

    c_user_author_read_comment_7d_sum = fc.numeric_column('c_user_author_read_comment_7d_sum', default_value=0.0)

    dense_feature_columns += [videoplayseconds, u_read_comment_7d_sum, u_like_7d_sum, u_click_avatar_7d_sum,
                              u_forward_7d_sum, u_comment_7d_sum, u_follow_7d_sum, u_favorite_7d_sum,
                              i_read_comment_7d_sum, i_like_7d_sum, i_click_avatar_7d_sum, i_forward_7d_sum,
                              i_comment_7d_sum, i_follow_7d_sum, i_favorite_7d_sum,
                              c_user_author_read_comment_7d_sum]

    # 类别特征
    userid = fc.categorical_column_with_vocabulary_file('userid', os.path.join(vocabulary_dir, 'userid.txt'))
    feedid = fc.categorical_column_with_vocabulary_file('feedid', os.path.join(vocabulary_dir, 'feedid.txt'))
    device = fc.categorical_column_with_vocabulary_file('device', os.path.join(vocabulary_dir, 'device.txt'))
    authorid = fc.categorical_column_with_vocabulary_file('authorid', os.path.join(vocabulary_dir, 'authorid.txt'))
    bgm_song_id = fc.categorical_column_with_vocabulary_file('bgm_song_id', os.path.join(vocabulary_dir, 'bgm_song_id.txt'))
    bgm_singer_id = fc.categorical_column_with_vocabulary_file('bgm_singer_id', os.path.join(vocabulary_dir, 'bgm_singer_id.txt'))
    manual_tag_list = fc.categorical_column_with_vocabulary_file('manual_tag_list', os.path.join(vocabulary_dir, 'manual_tag_id.txt'))
    his_read_comment_7d_seq = fc.categorical_column_with_vocabulary_file('his_read_comment_7d_seq', os.path.join(vocabulary_dir, 'feedid.txt'))

    userid_emb = fc.embedding_column(userid, 16)
    feedid_emb = fc.embedding_column(feedid, 16, combiner='mean')
    device_emb = fc.embedding_column(device, 2)
    authorid_emb = fc.embedding_column(authorid, 4)
    bgm_song_id_emb = fc.embedding_column(bgm_song_id, 4)
    bgm_singer_id_emb = fc.embedding_column(bgm_singer_id, 4)
    manual_tag_id_emb = fc.embedding_column(manual_tag_list, 4, combiner='mean')

    category_feature_columns += [userid_emb, device_emb, authorid_emb, bgm_song_id_emb, bgm_singer_id_emb, manual_tag_id_emb]
    category_feature_columns += [feedid_emb]  # feedid_emb是list
    
    # label
    label_feature_columns += [fc.numeric_column(task_name, default_value=0.0) for task_name in task_names]
    
    return dense_feature_columns, category_feature_columns, label_feature_columns

In [4]:
def example_parser(serialized_example):
    """
        批量解析Example
    Args:
        serialized_example:

    Returns:
        features, labels
    """
    fea_columns = total_feature_columns
    label_columns = label_feature_columns
   
    feature_spec = tf.feature_column.make_parse_example_spec(fea_columns + label_columns)
    features = tf.io.parse_example(serialized_example, features=feature_spec)

    labels = {task_name: features.pop(task_name) for task_name in task_names}

    return features, labels

def train_input_fn(filepath, example_parser, batch_size, num_epochs, shuffle_buffer_size):
    """
        mmoe模型的input_fn
    Args:
        filepath (str): 训练集/验证集的路径
        example_parser (function): 解析example的函数
        batch_size (int): 每个batch样本大小
        num_epochs (int): 训练轮数
        shuffle_buffer_size (inr): shuffle时buffer的大小

    Returns:
        dataset
    """

    dataset = tf.data.TFRecordDataset(filepath)
    if shuffle_buffer_size > 0:
        dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(example_parser, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.prefetch(1)

    return dataset

def eval_input_fn(filepath, example_parser, batch_size):
    """
        mmoe模型的eval阶段input_fn
    Args:
        filepath (str): 训练集/验证集的路径
        example_parser (function): 解析example的函数
        batch_size (int): 每个batch样本大小

    Returns:
        dataset
    """

    dataset = tf.data.TFRecordDataset(filepath)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(example_parser, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.prefetch(1)

    return dataset

In [5]:
global total_feature_columns, label_feature_columns
dense_feature_columns, category_feature_columns, label_feature_columns = create_feature_columns()

total_feature_columns = dense_feature_columns + category_feature_columns

dataset = train_input_fn('E:/Deep Learning/dataset/wechat_bigdata/tfrecord/train.tfrecord',
                        example_parser,
                        batch_size,
                        num_epochs,
                        shuffle_buffer_size
                        )

one_element = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()

one_element

INFO:tensorflow:vocabulary_size = 19626 in userid is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/userid.txt.
INFO:tensorflow:vocabulary_size = 106444 in feedid is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/feedid.txt.
INFO:tensorflow:vocabulary_size = 2 in device is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/device.txt.
INFO:tensorflow:vocabulary_size = 18789 in authorid is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/authorid.txt.
INFO:tensorflow:vocabulary_size = 25159 in bgm_song_id is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/bgm_song_id.txt.
INFO:tensorflow:vocabulary_size = 17500 in bgm_singer_id is inferred from the number of elements in the vocab

({'authorid': <tensorflow.python.framework.sparse_tensor.SparseTensor at 0x1b458cb1130>,
  'bgm_singer_id': <tensorflow.python.framework.sparse_tensor.SparseTensor at 0x1b458cb13a0>,
  'bgm_song_id': <tensorflow.python.framework.sparse_tensor.SparseTensor at 0x1b458c5caf0>,
  'device': <tensorflow.python.framework.sparse_tensor.SparseTensor at 0x1b458c5c6a0>,
  'feedid': <tensorflow.python.framework.sparse_tensor.SparseTensor at 0x1b458c5c910>,
  'manual_tag_list': <tensorflow.python.framework.sparse_tensor.SparseTensor at 0x1b458c5c3a0>,
  'userid': <tensorflow.python.framework.sparse_tensor.SparseTensor at 0x1b458c9ccd0>,
  'c_user_author_read_comment_7d_sum': <tf.Tensor: shape=(1024, 1), dtype=float32, numpy=
  array([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]], dtype=float32)>,
  'i_click_avatar_7d_sum': <tf.Tensor: shape=(1024, 1), dtype=float32, numpy=
  array([[0.6931472],
         [0.       ],
         [0.       ],
         ...

In [6]:

def mmoe_model_fn(features, labels, mode, params):
    """
        mmoe模型的model_fn
    Args:
        features (dict): input_fn的第一个返回值, 模型输入样本特征
        labels (dict): input_fn的第二个返回值, 样本标签
        mode: tf.estimator.ModeKeys
        params (dict): 模型超参数

    Returns:
        tf.estimator.EstimatorSpec
    """

    # 连续特征
    with tf.compat.v1.variable_scope("dense_input"):
        dense_input = tf.compat.v1.feature_column.input_layer(features, params["dense_feature_columns"])

    
    print(dense_input)
    # 类别特征
    with tf.compat.v1.variable_scope("category_input"):
        category_input = tf.compat.v1.feature_column.input_layer(features, params["category_feature_columns"])

    # concat all
    concat_all_input = tf.concat([dense_input, category_input], axis=-1)

    # experts
    with tf.compat.v1.variable_scope("experts"):
        # 专家网络输出列表
        experts = [tf.compat.v1.layers.dense(concat_all_input,
                                   params["expert_hidden_units"],
                                   activation=tf.nn.relu,
                                   name=f"expert_{i}") for i in range(params["num_experts"])] # modified [0]
        # [B, expert_hidden_units] * num_experts
        experts = [e[:, tf.newaxis, :] for e in experts]    # (B, 1, expert_hidden_units) * num_experts
        experts = tf.concat(experts, axis=1)    # (B, num_experts, expert_hidden_units)

    with tf.compat.v1.variable_scope("gates"):
        # 门输出列表
        gates = [tf.compat.v1.layers.dense(concat_all_input,
                                 params["num_experts"],
                                 activation=tf.nn.softmax,
                                 use_bias=False,    # 论文中省略了bias
                                 name=f"gate_{i}") for i in range(params["num_tasks"])]
        # [B, num_experts] * num_tasks

    with tf.compat.v1.variable_scope("tower"):
        # 任务塔列表
        towers = []
        for i in range(params["num_tasks"]):
            # 每一个门输出都要增加一个维度, 以便做矩阵乘法
            gate = tf.expand_dims(gates[i], axis=-1)    # (B, num_experts, 1)
            # 专家网络输出分别和每一个门输出做矩阵乘法
            tower = tf.matmul(experts, gate, transpose_a=True)  # (B, expert_hidden_units, num_experts) * (B, num_experts, 1) = (B, expert_hidden_units, 1)
            tower = tf.squeeze(tower, axis=-1)  # (B, expert_hidden_units)
            towers.append(tower)    # (B, expert_hidden_units) * num_tasks

        # 任务名列表
        # task_names = list(labels.keys()) 这么写在导出saved_model时会报错
        task_names = params["task_names"]
        logit_list = [tower_layer(x,
                                  params["hidden_units"],
                                  mode,
                                  params["batch_norm"],
                                  params["dropout_rate"],
                                  task_name) for x, task_name in zip(towers, task_names)]
        # (B, 1) * num_tasks

    # -----定义PREDICT阶段行为-----
    prediction_list = [tf.sigmoid(logit, name=f"prediction_{task_name}") for logit, task_name in zip(logit_list, task_names)]
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            f"{task_name}_probabilities": prediction for task_name, prediction in zip(task_names, prediction_list)
        }
        export_outputs = {
            'prediction': tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)
    # -----定义完毕-----

    losses = [tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels[task_name], logits=logit), name=f"loss_{task_name}")
              for logit, task_name in zip(logit_list, task_names)]
    total_loss = tf.add_n(losses)

    accuracy_list = [tf.compat.v1.metrics.accuracy(labels=labels[task_name], predictions=tf.compat.v1.to_float(tf.greater_equal(prediction, 0.5)))
                     for task_name, prediction in zip(task_names, prediction_list)]
    auc_list = [tf.compat.v1.metrics.auc(labels=labels[task_name], predictions=prediction)
                for task_name, prediction in zip(task_names, prediction_list)]

    # -----定义EVAL阶段行为-----
    auc_metrics = {f"eval_{task_name}_auc": auc for task_name, auc in zip(task_names, auc_list)}
    accuracy_metrics = {f"eval_{task_name}_accuracy": accuracy for task_name, accuracy in zip(task_names, accuracy_list)}
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss=total_loss, eval_metric_ops={**accuracy_metrics, **auc_metrics})
    # -----定义完毕-----

    optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=params["learning_rate"], beta1=0.9,
                                       beta2=0.999, epsilon=1e-8)
    update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss=total_loss, global_step=tf.compat.v1.train.get_global_step())

    # -----定义TRAIN阶段行为-----
    assert mode == tf.estimator.ModeKeys.TRAIN

    # tensorboard收集
    for task_name, auc in zip(task_names, auc_list):
        tf.summary.scalar(f"train_{task_name}_auc", auc[1])
    for task_name, accuracy in zip(task_names, accuracy_list):
        tf.summary.scalar(f"train_{task_name}_accuracy", accuracy[1])

    # 训练log打印
    # 观测loss
    loss_log = {f"train_{task_name}_loss": loss for task_name, loss in zip(task_names, losses)}
    # 观测训练auc
    auc_log = {f"train_{task_name}_auc": auc[1] for task_name, auc in zip(task_names, auc_list)}
    # 观测gate输出
    gate_log = {f"{task_name}_gate_expert_weight": gate for task_name, gate, in zip(task_names, gates)}

    loss_log_hook = tf.compat.v1.train.LoggingTensorHook(
        loss_log,
        every_n_iter=100
    )
    auc_log_hook = tf.compat.v1.train.LoggingTensorHook(
        auc_log,
        every_n_iter=100
    )
    gate_log_hook = tf.compat.v1.train.LoggingTensorHook(
        gate_log,
        every_n_iter=100
    )
    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op,
                                      training_hooks=[loss_log_hook, auc_log_hook, gate_log_hook])
    # -----定义完毕-----


In [7]:

"""训练入口"""

global total_feature_columns, label_feature_columns
dense_feature_columns, category_feature_columns, label_feature_columns = create_feature_columns()
total_feature_columns = dense_feature_columns + category_feature_columns

params = {
             "dense_feature_columns": dense_feature_columns,
             "category_feature_columns": category_feature_columns,
             "hidden_units": hidden_units,
             "dropout_rate": dropout_rate,
             "batch_norm": batch_norm,
             "learning_rate": learning_rate,
             "num_experts": num_experts,
             "num_tasks": num_tasks,
             "expert_hidden_units": expert_hidden_units,
             "task_names": task_names,
         }
# print(params)

# 任务数要和任务名列表长度一致
assert params["num_tasks"] == len(params["task_names"]), "num_tasks must equals length of task_names"

INFO:tensorflow:vocabulary_size = 19626 in userid is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/userid.txt.
INFO:tensorflow:vocabulary_size = 106444 in feedid is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/feedid.txt.
INFO:tensorflow:vocabulary_size = 2 in device is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/device.txt.
INFO:tensorflow:vocabulary_size = 18789 in authorid is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/authorid.txt.
INFO:tensorflow:vocabulary_size = 25159 in bgm_song_id is inferred from the number of elements in the vocabulary_file E:/Deep Learning/dataset/wechat_bigdata/vocabulary/bgm_song_id.txt.
INFO:tensorflow:vocabulary_size = 17500 in bgm_singer_id is inferred from the number of elements in the vocab

In [8]:
estimator = tf.estimator.Estimator(
    model_fn=mmoe_model_fn,
    params=params,
    config=tf.estimator.RunConfig(model_dir=model_dir, save_checkpoints_steps=save_checkpoints_steps)
)

train_spec = tf.estimator.TrainSpec(
    input_fn=lambda: train_input_fn(filepath=train_data, example_parser=example_parser,
                                    batch_size=batch_size, num_epochs=num_epochs,
                                    shuffle_buffer_size=shuffle_buffer_size),
    max_steps=train_steps
)

feature_spec = tf.feature_column.make_parse_example_spec(total_feature_columns)
serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
exporters = [
    tf.estimator.BestExporter(
        name="best_exporter",
        serving_input_receiver_fn=serving_input_receiver_fn,
        exports_to_keep=5)
]

eval_spec = tf.estimator.EvalSpec(
    input_fn=lambda: eval_input_fn(filepath=eval_data, example_parser=example_parser,
                                   batch_size=batch_size),
    throttle_secs=600,
    steps=None,
    exporters=exporters
)

tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

INFO:tensorflow:Using config: {'_model_dir': '.model\\model_dir', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distri

  experts = [tf.compat.v1.layers.dense(concat_all_input,
  return layer.apply(inputs)
  gates = [tf.compat.v1.layers.dense(concat_all_input,
  return layer.apply(inputs, training=training)
  return layer.apply(inputs, training=training)


Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
The value of AUC returned by this may race with the update so this is deprecated. Please use tf.keras.metrics.AUC instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from .model\model_dir\model.ckpt-21000
Instructions for updating:
Use standard file utilities to get mtimes.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 21000...
INFO:tensorflow:Saving checkpoints for 21000 into .model\model_dir\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 21000...
INFO:tensorflow:loss = 0.31467772, step = 21000
INFO:tensorflow:train_click_avatar_loss = 0.023751548, train_like_loss = 0.14526816, train_read_comment_loss = 0.14565802
INFO:tensorflow:train_click_avatar_auc = 0.82

INFO:tensorflow:global_step/sec: 10.4437
INFO:tensorflow:loss = 0.117363, step = 21600 (9.575 sec)
INFO:tensorflow:train_click_avatar_loss = 0.03375936, train_like_loss = 0.037053436, train_read_comment_loss = 0.046550207 (9.576 sec)
INFO:tensorflow:train_click_avatar_auc = 0.9553991, train_like_auc = 0.9456153, train_read_comment_auc = 0.958327 (9.575 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.91784954 0.06971823 0.01243224]
 [0.5293422  0.03740587 0.43325195]
 [0.9999645  0.00000237 0.00003311]
 ...
 [0.33128628 0.026504   0.64220977]
 [0.47320062 0.5041415  0.02265792]
 [0.13218834 0.81715703 0.05065458]], like_gate_expert_weight = [[0.00000079 0.00797677 0.9920224 ]
 [0.0000035  0.00559985 0.9943967 ]
 [0.         0.0000148  0.9999852 ]
 ...
 [0.00000004 0.00490186 0.99509805]
 [0.00003256 0.4244925  0.5754749 ]
 [0.00184224 0.00144477 0.99671304]], read_comment_gate_expert_weight = [[0.0091114  0.9908153  0.0000733 ]
 [0.24965404 0.75030833 0.0000376 ]
 [0.9966961 

INFO:tensorflow:global_step/sec: 10.3547
INFO:tensorflow:loss = 0.14835696, step = 22100 (9.658 sec)
INFO:tensorflow:train_click_avatar_loss = 0.026261408, train_like_loss = 0.055217568, train_read_comment_loss = 0.06687798 (9.659 sec)
INFO:tensorflow:train_click_avatar_auc = 0.9587828, train_like_auc = 0.95237017, train_read_comment_auc = 0.96443117 (9.659 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.12615208 0.6489073  0.22494055]
 [0.999964   0.00000953 0.00002642]
 [0.99602145 0.00040762 0.00357098]
 ...
 [0.99958175 0.00000356 0.00041468]
 [0.9378795  0.062028   0.00009251]
 [0.08686412 0.9025654  0.01057043]], like_gate_expert_weight = [[0.00093027 0.7559905  0.24307929]
 [0.00000584 0.0002883  0.99970585]
 [0.00063259 0.01501488 0.9843525 ]
 ...
 [0.11328051 0.00008544 0.88663405]
 [0.00054437 0.00082834 0.9986273 ]
 [0.00342439 0.00256251 0.99401313]], read_comment_gate_expert_weight = [[0.96631384 0.0333841  0.00030208]
 [0.06572488 0.9342725  0.00000254]
 [0.716

INFO:tensorflow:train_click_avatar_auc = 0.9527222, train_like_auc = 0.9567162, train_read_comment_auc = 0.9671744 (9.670 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.99531317 0.00468268 0.00000417]
 [0.23487082 0.7644153  0.00071388]
 [0.02621672 0.95644575 0.01733742]
 ...
 [0.9999435  0.00000054 0.00005599]
 [0.05811061 0.93702596 0.00486345]
 [0.00306836 0.0965266  0.900405  ]], like_gate_expert_weight = [[0.00006178 0.6167288  0.38320944]
 [0.00001615 0.2380676  0.7619162 ]
 [0.0155123  0.971901   0.01258664]
 ...
 [0.00002294 0.00000001 0.999977  ]
 [0.00320913 0.03178144 0.96500945]
 [0.9504483  0.04082841 0.00872331]], read_comment_gate_expert_weight = [[0.9999994  0.00000058 0.        ]
 [1.         0.00000001 0.        ]
 [0.75046456 0.2494225  0.00011302]
 ...
 [0.9999801  0.00001996 0.00000002]
 [0.02694767 0.9730477  0.00000469]
 [0.95708555 0.03531468 0.00759975]] (9.671 sec)
INFO:tensorflow:global_step/sec: 10.2477
INFO:tensorflow:loss = 0.124784365, step =

INFO:tensorflow:loss = 0.15420243, step = 23500 (10.325 sec)
INFO:tensorflow:train_click_avatar_loss = 0.02511353, train_like_loss = 0.059175048, train_read_comment_loss = 0.06991385 (10.325 sec)
INFO:tensorflow:train_click_avatar_auc = 0.9602129, train_like_auc = 0.9525106, train_read_comment_auc = 0.9665232 (10.325 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.9978794  0.00201595 0.00010463]
 [0.02548742 0.9744922  0.00002049]
 [0.9999654  0.00001517 0.00001945]
 ...
 [0.9993568  0.0004008  0.00024248]
 [0.9986815  0.00005018 0.00126835]
 [0.7146182  0.07849748 0.2068843 ]], like_gate_expert_weight = [[0.03387833 0.42051712 0.54560447]
 [0.69059455 0.00129187 0.30811355]
 [0.00002186 0.00000029 0.9999778 ]
 ...
 [0.00118453 0.0069197  0.99189574]
 [0.00025547 0.00006038 0.9996842 ]
 [0.00664484 0.59233475 0.40102035]], read_comment_gate_expert_weight = [[0.00014461 0.9992378  0.00061761]
 [0.00011062 0.9998894  0.00000003]
 [0.00025882 0.99973756 0.00000357]
 ...
 [0.999

INFO:tensorflow:global_step/sec: 10.1356
INFO:tensorflow:loss = 0.15348214, step = 24200 (9.865 sec)
INFO:tensorflow:train_click_avatar_loss = 0.021237636, train_like_loss = 0.07181516, train_read_comment_loss = 0.060429342 (9.866 sec)
INFO:tensorflow:train_click_avatar_auc = 0.96005356, train_like_auc = 0.9457384, train_read_comment_auc = 0.96634156 (9.865 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.00563443 0.97157943 0.02278608]
 [0.35929653 0.6404167  0.00028677]
 [0.9952069  0.00266848 0.00212456]
 ...
 [0.00118069 0.99864286 0.00017648]
 [0.99902    0.00017985 0.00080017]
 [0.0317326  0.93857545 0.02969195]], like_gate_expert_weight = [[0.00702142 0.35149217 0.64148647]
 [0.         0.         1.        ]
 [0.00343406 0.0721758  0.9243901 ]
 ...
 [0.00338221 0.06124155 0.9353763 ]
 [0.1360981  0.11379544 0.7501064 ]
 [0.00073862 0.00953785 0.98972356]], read_comment_gate_expert_weight = [[0.0498622  0.8864655  0.06367231]
 [0.09920168 0.90075856 0.00003969]
 [0.000

INFO:tensorflow:train_click_avatar_auc = 0.9609165, train_like_auc = 0.9491041, train_read_comment_auc = 0.96914107 (10.044 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.01617577 0.98272    0.00110424]
 [0.21631707 0.7387862  0.04489672]
 [0.8866566  0.00016123 0.11318231]
 ...
 [0.10844493 0.14075573 0.7507994 ]
 [0.00012913 0.99928963 0.0005812 ]
 [0.9999552  0.00002547 0.00001927]], like_gate_expert_weight = [[0.00000321 0.9454747  0.05452209]
 [0.00000247 0.38101506 0.6189825 ]
 [0.00000021 0.00049692 0.9995029 ]
 ...
 [0.0000011  0.96810037 0.03189865]
 [0.00004679 0.01258839 0.9873648 ]
 [0.00000735 0.89951116 0.10048143]], read_comment_gate_expert_weight = [[0.00419205 0.99579656 0.00001137]
 [0.00047476 0.99951303 0.00001219]
 [0.00014936 0.9998473  0.00000339]
 ...
 [0.00085936 0.99910706 0.0000336 ]
 [0.00692341 0.99225736 0.00081926]
 [1.         0.00000004 0.        ]] (10.044 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 25000...
I

INFO:tensorflow:loss = 0.11392413, step = 25600 (9.790 sec)
INFO:tensorflow:train_click_avatar_loss = 0.013094682, train_like_loss = 0.04954065, train_read_comment_loss = 0.0512888 (9.790 sec)
INFO:tensorflow:train_click_avatar_auc = 0.96214783, train_like_auc = 0.9516025, train_read_comment_auc = 0.9712131 (9.790 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.9751178  0.00488239 0.01999972]
 [0.18725152 0.00286622 0.8098822 ]
 [0.18340528 0.00004941 0.81654537]
 ...
 [0.36789528 0.00000155 0.63210326]
 [0.14637241 0.00001774 0.8536098 ]
 [0.1518997  0.00253351 0.8455668 ]], like_gate_expert_weight = [[0.00032861 0.02853136 0.97113997]
 [0.00620008 0.02867007 0.9651299 ]
 [0.00009884 0.01018389 0.98971725]
 ...
 [0.00144157 0.02314385 0.9754146 ]
 [0.00057311 0.8662626  0.1331643 ]
 [0.00000055 0.99869186 0.00130754]], read_comment_gate_expert_weight = [[0.10925985 0.89073765 0.00000259]
 [0.00029733 0.9995072  0.00019554]
 [0.99999976 0.00000022 0.00000001]
 ...
 [0.010359

INFO:tensorflow:global_step/sec: 9.93976
INFO:tensorflow:loss = 0.1545454, step = 26300 (10.061 sec)
INFO:tensorflow:train_click_avatar_loss = 0.017235849, train_like_loss = 0.0694719, train_read_comment_loss = 0.06783764 (10.061 sec)
INFO:tensorflow:train_click_avatar_auc = 0.9620606, train_like_auc = 0.9532783, train_read_comment_auc = 0.9713708 (10.061 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.9755163  0.00020967 0.02427407]
 [0.9999888  0.00000172 0.00000956]
 [0.10218688 0.01894866 0.8788644 ]
 ...
 [0.5795437  0.4191255  0.00133074]
 [0.64899904 0.33425146 0.0167495 ]
 [0.01831746 0.9814849  0.00019758]], like_gate_expert_weight = [[0.034529   0.00000005 0.9654709 ]
 [0.00425901 0.00000002 0.995741  ]
 [0.11640962 0.1209767  0.7626136 ]
 ...
 [0.00015432 0.9984426  0.00140307]
 [0.99483806 0.0000097  0.00515221]
 [0.0000212  0.0000006  0.9999782 ]], read_comment_gate_expert_weight = [[0.00001198 0.9999864  0.00000161]
 [0.6669909  0.3330064  0.0000027 ]
 [0.52213

INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 27000...
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:global_step/sec: 9.95588
INFO:tensorflow:loss = 0.12816432, step = 27000 (10.044 sec)
INFO:tensorflow:train_click_avatar_loss = 0.02482652, train_like_loss = 0.049736112, train_read_comment_loss = 0.053601697 (10.044 sec)
INFO:tensorflow:train_click_avatar_auc = 0.9636255, train_like_auc = 0.95269406, train_read_comment_auc = 0.9718305 (10.044 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.9988944  0.00042917 0.00067644]
 [0.999962   0.00002974 0.00000834]
 [0.21230897 0.7457032  0.04198786]
 ...
 [0.989077   0.00477078 0.00615223]
 [0.00002133 0.9779377  0.02204089]
 [0.03601127 0.8923389  0.07164992]], like_gate_expert_weight = [[0.00000296 0.00105465 0.99894243]
 [0.00006    0.0000022  0.9999378 ]
 [0.00104294 0.         0.99895704]
 ...
 [0.00326933 0.10142356 0.8953071 ]
 [0.00030122 0.00022713 0

INFO:tensorflow:global_step/sec: 10.0926
INFO:tensorflow:loss = 0.09976241, step = 27700 (9.908 sec)
INFO:tensorflow:train_click_avatar_loss = 0.018142598, train_like_loss = 0.040796172, train_read_comment_loss = 0.04082364 (9.908 sec)
INFO:tensorflow:train_click_avatar_auc = 0.965096, train_like_auc = 0.953888, train_read_comment_auc = 0.9727085 (9.908 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.00167057 0.00000884 0.9983205 ]
 [0.00733179 0.21943568 0.7732325 ]
 [0.0000001  0.99971694 0.00028294]
 ...
 [0.00139238 0.00074347 0.9978642 ]
 [0.99371934 0.00000436 0.0062763 ]
 [0.12893564 0.00000032 0.87106407]], like_gate_expert_weight = [[0.00005173 0.277774   0.7221743 ]
 [0.00001414 0.01543881 0.9845471 ]
 [0.00002516 0.00225078 0.9977241 ]
 ...
 [0.0311302  0.03102238 0.9378474 ]
 [0.00000067 0.00004385 0.9999554 ]
 [0.00000251 0.00009373 0.9999038 ]], read_comment_gate_expert_weight = [[0.5570077  0.42880356 0.01418869]
 [0.99999976 0.         0.00000023]
 [0.0024713

INFO:tensorflow:train_click_avatar_auc = 0.9665438, train_like_auc = 0.9527916, train_read_comment_auc = 0.97385633 (10.048 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.99999917 0.00000088 0.00000004]
 [0.38339865 0.5994572  0.01714409]
 [0.8113698  0.04219008 0.14644004]
 ...
 [0.7072081  0.26097244 0.0318195 ]
 [0.91767013 0.00021586 0.082114  ]
 [0.377408   0.01977607 0.6028159 ]], like_gate_expert_weight = [[0.00000065 0.00000174 0.9999976 ]
 [0.02026841 0.96010464 0.01962693]
 [0.00000091 0.00002566 0.9999734 ]
 ...
 [0.00454938 0.00016038 0.9952903 ]
 [0.00000614 0.99694103 0.00305272]
 [0.00000055 0.00206322 0.99793625]], read_comment_gate_expert_weight = [[0.86910444 0.12963836 0.00125723]
 [0.00050554 0.9994899  0.00000447]
 [0.00066638 0.9993332  0.00000033]
 ...
 [0.00210831 0.99782026 0.00007152]
 [0.00033803 0.9970595  0.00260247]
 [0.40496948 0.5610776  0.03395293]] (10.048 sec)
INFO:tensorflow:global_step/sec: 9.41665
INFO:tensorflow:loss = 0.117460065, ste

INFO:tensorflow:loss = 0.12642175, step = 29000 (10.509 sec)
INFO:tensorflow:train_click_avatar_loss = 0.01453539, train_like_loss = 0.05055435, train_read_comment_loss = 0.06133201 (10.508 sec)
INFO:tensorflow:train_click_avatar_auc = 0.968262, train_like_auc = 0.9541608, train_read_comment_auc = 0.9742767 (10.509 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.33626467 0.0366523  0.62708306]
 [0.99999964 0.         0.00000032]
 [0.8902419  0.05307868 0.05667941]
 ...
 [0.8394412  0.00501587 0.15554291]
 [0.8536227  0.11709104 0.02928627]
 [0.9999912  0.00000752 0.00000128]], like_gate_expert_weight = [[0.03682183 0.84131527 0.12186287]
 [0.         0.         1.        ]
 [0.00599075 0.0273546  0.96665466]
 ...
 [0.00005197 0.45068464 0.54926336]
 [0.00000002 0.99972254 0.0002774 ]
 [0.00004964 0.12439983 0.87555057]], read_comment_gate_expert_weight = [[0.15257317 0.8472961  0.00013075]
 [0.00000077 0.9999993  0.        ]
 [0.29956108 0.7004386  0.00000027]
 ...
 [0.01931

INFO:tensorflow:train_click_avatar_auc = 0.9697159, train_like_auc = 0.9545347, train_read_comment_auc = 0.9744173 (10.610 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.9996916  0.00030738 0.00000105]
 [0.00186311 0.00317388 0.99496305]
 [0.99990654 0.00007696 0.0000165 ]
 ...
 [0.9994344  0.000019   0.00054652]
 [0.08280237 0.75839305 0.15880461]
 [0.9411684  0.00019962 0.0586319 ]], like_gate_expert_weight = [[0.24350528 0.00000006 0.75649464]
 [0.02466797 0.13997583 0.8353562 ]
 [0.74983215 0.         0.25016785]
 ...
 [0.00000728 0.00029154 0.9997012 ]
 [0.00625015 0.00043731 0.99331254]
 [0.00012216 0.00000012 0.9998777 ]], read_comment_gate_expert_weight = [[0.40062353 0.5991366  0.00023992]
 [0.0001757  0.9998035  0.00002088]
 [0.99768114 0.00231819 0.00000062]
 ...
 [0.00204592 0.9979519  0.00000215]
 [0.9999999  0.00000008 0.00000001]
 [0.00000045 0.9999995  0.        ]] (10.611 sec)
INFO:tensorflow:global_step/sec: 9.31352
INFO:tensorflow:loss = 0.14887929, step 

INFO:tensorflow:loss = 0.1815192, step = 30400 (10.800 sec)
INFO:tensorflow:train_click_avatar_loss = 0.01674076, train_like_loss = 0.06723473, train_read_comment_loss = 0.0975437 (10.800 sec)
INFO:tensorflow:train_click_avatar_auc = 0.97142607, train_like_auc = 0.9537442, train_read_comment_auc = 0.97418 (10.800 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.9396074  0.02236058 0.0380321 ]
 [0.9996774  0.00032264 0.00000005]
 [0.10635754 0.86658174 0.02706078]
 ...
 [1.         0.00000004 0.00000004]
 [0.04351834 0.8555539  0.10092774]
 [0.99999917 0.00000081 0.        ]], like_gate_expert_weight = [[0.00418372 0.00070015 0.9951161 ]
 [0.00000048 0.00000001 0.9999995 ]
 [0.8183294  0.0000003  0.18167026]
 ...
 [0.00000003 0.         1.        ]
 [0.00000029 0.9968786  0.00312118]
 [0.00030153 0.         0.9996985 ]], read_comment_gate_expert_weight = [[0.00016233 0.99981624 0.0000215 ]
 [0.0002509  0.9997491  0.        ]
 [0.01600927 0.98398983 0.00000082]
 ...
 [0.0020386

INFO:tensorflow:global_step/sec: 9.86557
INFO:tensorflow:loss = 0.13432752, step = 31100 (10.135 sec)
INFO:tensorflow:train_click_avatar_loss = 0.019050691, train_like_loss = 0.046776008, train_read_comment_loss = 0.06850082 (10.135 sec)
INFO:tensorflow:train_click_avatar_auc = 0.97024685, train_like_auc = 0.95422465, train_read_comment_auc = 0.9744305 (10.136 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.48571074 0.03074215 0.48354718]
 [0.00000204 0.84964794 0.15035   ]
 [0.99148285 0.00285719 0.00565999]
 ...
 [0.9999248  0.00000002 0.00007519]
 [0.00640591 0.9934442  0.00014999]
 [0.08068641 0.09250699 0.8268066 ]], like_gate_expert_weight = [[0.00000235 0.00000138 0.9999963 ]
 [0.00000005 0.9714038  0.02859624]
 [0.00000052 0.00160717 0.9983923 ]
 ...
 [0.00000027 0.00000049 0.9999993 ]
 [0.00263465 0.00757496 0.9897904 ]
 [0.00093589 0.00114849 0.99791557]], read_comment_gate_expert_weight = [[0.0002305  0.9997408  0.00002874]
 [0.99998903 0.00001097 0.00000001]
 [1.

INFO:tensorflow:train_click_avatar_loss = 0.034763396, train_like_loss = 0.049115304, train_read_comment_loss = 0.06270486 (12.061 sec)
INFO:tensorflow:train_click_avatar_auc = 0.96915454, train_like_auc = 0.9552703, train_read_comment_auc = 0.97479343 (12.060 sec)
INFO:tensorflow:click_avatar_gate_expert_weight = [[0.9999944  0.00000012 0.00000551]
 [0.99999523 0.00000232 0.00000256]
 [0.98505986 0.00474706 0.01019308]
 ...
 [0.99127483 0.00017387 0.00855126]
 [0.02626534 0.9735118  0.00022283]
 [0.9998419  0.00011    0.000048  ]], like_gate_expert_weight = [[0.00000144 0.00008056 0.999918  ]
 [0.         0.         1.        ]
 [0.00000138 0.00002196 0.99997663]
 ...
 [0.00000255 0.0977408  0.90225667]
 [0.00145879 0.9977679  0.00077322]
 [0.00818951 0.3127874  0.679023  ]], read_comment_gate_expert_weight = [[0.7767306  0.2229365  0.00033293]
 [0.2532395  0.74663395 0.00012645]
 [0.8755483  0.12445011 0.00000158]
 ...
 [0.98943615 0.01044356 0.00012025]
 [0.00349576 0.99650383 0.000

KeyboardInterrupt: 