In [1]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import tensorflow as tf
import socket
from sequential_base_model import SequentialBaseModel
from rnn_cell_implement import VecAttGRUCell
from rnn_dien import dynamic_rnn as dynamic_rnn_dien
from tensorflow.keras import backend as K


In [2]:

__all__ = ["SURGEModel"]


class SURGEModel(SequentialBaseModel):

    def __init__(self, hparams, iterator_creator, seed=None):
        """Initialization of variables or temp hyperparameters

        Args:
            hparams (obj): A tf.contrib.training.HParams object, hold the entire set of hyperparameters.
            iterator_creator (obj): An iterator to load the data.
        """
        self.hparams = hparams
        self.relative_threshold = 0.5 
        self.metric_heads = 1
        self.attention_heads = 1
        self.pool_layers = 1
        self.layer_shared = True
        if 'kwai' in socket.gethostname():
            self.pool_length = 150 # kuaishou
        else:
            self.pool_length = 30 # taobao
        super().__init__(hparams, iterator_creator, seed=None)


    def _build_seq_graph(self):
        """ SURGE Model: 

            1) Interest graph: Graph construction based on metric learning
            2) Interest fusion and extraction : Graph convolution and graph pooling 
            3) Prediction: Flatten pooled graph to reduced sequence
        """
        X = tf.concat(
            [self.item_history_embedding, self.cate_history_embedding], 2
        )
        self.mask = self.iterator.mask
        self.float_mask = tf.cast(self.mask, tf.float32)
        self.real_sequence_length = tf.reduce_sum(self.mask, 1)

        with tf.name_scope('interest_graph'):
            ## Node similarity metric learning 
            S = []
            for i in range(self.metric_heads):
                # weighted cosine similarity
                self.weighted_tensor = tf.layers.dense(tf.ones([1, 1]), X.shape.as_list()[-1], use_bias=False)
                X_fts = X * tf.expand_dims(self.weighted_tensor, 0)
                X_fts = tf.nn.l2_normalize(X_fts,dim=2)
                S_one = tf.matmul(X_fts, tf.transpose(X_fts, (0,2,1))) # B*L*L
                # min-max normalization for mask
                S_min = tf.reduce_min(S_one, -1, keepdims=True)
                S_max = tf.reduce_max(S_one, -1, keepdims=True)
                S_one = (S_one - S_min) / (S_max - S_min)
                S += [S_one]
            S = tf.reduce_mean(tf.stack(S, 0), 0)
            # mask invalid nodes
            S = S * tf.expand_dims(self.float_mask, -1) * tf.expand_dims(self.float_mask, -2)

            ## Graph sparsification via seted sparseness 
            S_flatten = tf.reshape(S, [tf.shape(S)[0],-1])
            if 'kwai' in socket.gethostname():
                sorted_S_flatten = tf.contrib.framework.sort(S_flatten, direction='DESCENDING', axis=-1) # B*L -> B*L
            else:
                sorted_S_flatten = tf.sort(S_flatten, direction='DESCENDING', axis=-1) # B*L -> B*L
            # relative ranking strategy of the entire graph
            num_edges = tf.cast(tf.count_nonzero(S, [1,2]), tf.float32) # B
            to_keep_edge = tf.cast(tf.math.ceil(num_edges * self.relative_threshold), tf.int32)
            if 'kwai' in socket.gethostname():
                threshold_index = tf.stack([tf.range(tf.shape(X)[0]), tf.cast(to_keep_edge, tf.int32)], 1) # B*2
                threshold_score = tf.gather_nd(sorted_S_flatten, threshold_index) # indices[:-1]=(B) + data[indices[-1]=() --> (B)
            else:
                threshold_score = tf.gather_nd(sorted_S_flatten, tf.expand_dims(tf.cast(to_keep_edge, tf.int32), -1), batch_dims=1) # indices[:-1]=(B) + data[indices[-1]=() --> (B)
            A = tf.cast(tf.greater(S, tf.expand_dims(tf.expand_dims(threshold_score, -1), -1)), tf.float32)


        with tf.name_scope('interest_fusion_extraction'):
            for l in range(self.pool_layers):
                reuse = False if l==0 else True
                X, A, graph_readout, alphas = self._interest_fusion_extraction(X, A, layer=l, reuse=reuse)


        with tf.name_scope('prediction'):
            # flatten pooled graph to reduced sequence 
            output_shape = self.mask.get_shape()
            if 'kwai' in socket.gethostname():
                sorted_mask_index = tf.contrib.framework.argsort(self.mask, direction='DESCENDING', stable=True, axis=-1) # B*L -> B*L
                sorted_mask = tf.contrib.framework.sort(self.mask, direction='DESCENDING', axis=-1) # B*L -> B*L
            else:
                sorted_mask_index = tf.argsort(self.mask, direction='DESCENDING', stable=True, axis=-1) # B*L -> B*L
                sorted_mask = tf.sort(self.mask, direction='DESCENDING', axis=-1) # B*L -> B*L
            sorted_mask.set_shape(output_shape)
            sorted_mask_index.set_shape(output_shape)
            X = tf.batch_gather(X, sorted_mask_index) # B*L*F  < B*L = B*L*F
            self.mask = sorted_mask
            self.reduced_sequence_length = tf.reduce_sum(self.mask, 1) # B

            # cut useless sequence tail per batch 
            self.to_max_length = tf.range(tf.reduce_max(self.reduced_sequence_length)) # l
            X = tf.gather(X, self.to_max_length, axis=1) # B*L*F -> B*l*F
            self.mask = tf.gather(self.mask, self.to_max_length, axis=1) # B*L -> B*l
            self.reduced_sequence_length = tf.reduce_sum(self.mask, 1) # B

            # use cluster score as attention weights in AUGRU 
            _, alphas = self._attention_fcn(self.target_item_embedding, X, 'AGRU', False, return_alpha=True)
            _, final_state = dynamic_rnn_dien(
                VecAttGRUCell(self.hparams.hidden_size),
                inputs=X,
                att_scores = tf.expand_dims(alphas, -1),
                sequence_length=self.reduced_sequence_length,
                dtype=tf.float32,
                scope="gru"
            )
            model_output = tf.concat([final_state, graph_readout, self.target_item_embedding, graph_readout*self.target_item_embedding], 1)

        return model_output

  
    def _attention_fcn(self, query, key_value, name, reuse, return_alpha=False):
        """Apply attention by fully connected layers.

        Args:
            query (obj): The embedding of target item or cluster which is regarded as a query in attention operations.
            key_value (obj): The embedding of history items which is regarded as keys or values in attention operations.
            name (obj): The name of variable W 
            reuse (obj): Reusing variable W in query operation 
            return_alpha (obj): Returning attention weights

        Returns:
            output (obj): Weighted sum of value embedding.
            att_weights (obj):  Attention weights
        """
        with tf.variable_scope("attention_fcn"+str(name), reuse=reuse):
            query_size = query.shape[-1].value
            boolean_mask = tf.equal(self.mask, tf.ones_like(self.mask))

            attention_mat = tf.get_variable(
                name="attention_mat"+str(name),
                shape=[key_value.shape.as_list()[-1], query_size],
                initializer=self.initializer,
            )
            att_inputs = tf.tensordot(key_value, attention_mat, [[2], [0]])

            if query.shape.ndims != att_inputs.shape.ndims:
                queries = tf.reshape(
                    tf.tile(query, [1, tf.shape(att_inputs)[1]]), tf.shape(att_inputs)
                )
            else:
                queries = query

            last_hidden_nn_layer = tf.concat(
                [att_inputs, queries, att_inputs - queries, att_inputs * queries], -1
            )
            att_fnc_output = self._fcn_net(
                last_hidden_nn_layer, self.hparams.att_fcn_layer_sizes, scope="att_fcn"
            )
            att_fnc_output = tf.squeeze(att_fnc_output, -1)
            mask_paddings = tf.ones_like(att_fnc_output) * (-(2 ** 32) + 1)
            att_weights = tf.nn.softmax(
                tf.where(boolean_mask, att_fnc_output, mask_paddings),
                name="att_weights",
            )
            output = key_value * tf.expand_dims(att_weights, -1)
            if not return_alpha:
                return output
            else:
                return output, att_weights


    def _interest_fusion_extraction(self, X, A, layer, reuse):
        """Interest fusion and extraction via graph convolution and graph pooling 

        Args:
            X (obj): Node embedding of graph
            A (obj): Adjacency matrix of graph
            layer (obj): Interest fusion and extraction layer
            reuse (obj): Reusing variable W in query operation 

        Returns:
            X (obj): Aggerated cluster embedding 
            A (obj): Pooled adjacency matrix 
            graph_readout (obj): Readout embedding after graph pooling
            cluster_score (obj): Cluster score for AUGRU in prediction layer

        """
        with tf.name_scope('interest_fusion'):
            ## cluster embedding
            A_bool = tf.cast(tf.greater(A, 0), A.dtype)
            A_bool = A_bool * (tf.ones([A.shape.as_list()[1],A.shape.as_list()[1]]) - tf.eye(A.shape.as_list()[1])) + tf.eye(A.shape.as_list()[1])
            D = tf.reduce_sum(A_bool, axis=-1) # B*L
            D = tf.sqrt(D)[:, None] + K.epsilon() # B*1*L
            A = (A_bool / D) / tf.transpose(D, perm=(0,2,1)) # B*L*L / B*1*L / B*L*1
            X_q = tf.matmul(A, tf.matmul(A, X)) # B*L*F

            Xc = []
            for i in range(self.attention_heads):
                ## cluster- and query-aware attention
                if not self.layer_shared:
                    _, f_1 = self._attention_fcn(X_q, X, 'f1_layer_'+str(layer)+'_'+str(i), False, return_alpha=True)
                    _, f_2 = self._attention_fcn(self.target_item_embedding, X, 'f2_layer_'+str(layer)+'_'+str(i), False, return_alpha=True)
                if self.layer_shared:
                    _, f_1 = self._attention_fcn(X_q, X, 'f1_shared'+'_'+str(i), reuse, return_alpha=True)
                    _, f_2 = self._attention_fcn(self.target_item_embedding, X, 'f2_shared'+'_'+str(i), reuse, return_alpha=True)

                ## graph attentive convolution
                E = A_bool * tf.expand_dims(f_1,1) + A_bool * tf.transpose(tf.expand_dims(f_2,1), (0,2,1)) # B*L*1 x B*L*1 -> B*L*L
                E = tf.nn.leaky_relu(E)
                boolean_mask = tf.equal(A_bool, tf.ones_like(A_bool))
                mask_paddings = tf.ones_like(E) * (-(2 ** 32) + 1)
                E = tf.nn.softmax(
                    tf.where(boolean_mask, E, mask_paddings),
                    axis = -1
                )
                Xc_one = tf.matmul(E, X) # B*L*L x B*L*F -> B*L*F
                Xc_one = tf.layers.dense(Xc_one, 40, use_bias=False)
                Xc_one += X
                Xc += [tf.nn.leaky_relu(Xc_one)]
            Xc = tf.reduce_mean(tf.stack(Xc, 0), 0)

        with tf.name_scope('interest_extraction'):
            ## cluster fitness score 
            X_q = tf.matmul(A, tf.matmul(A, Xc)) # B*L*F
            cluster_score = []
            for i in range(self.attention_heads):
                if not self.layer_shared:
                    _, f_1 = self._attention_fcn(X_q, Xc, 'f1_layer_'+str(layer)+'_'+str(i), True, return_alpha=True)
                    _, f_2 = self._attention_fcn(self.target_item_embedding, Xc, 'f2_layer_'+str(layer)+'_'+str(i), True, return_alpha=True)
                if self.layer_shared:
                    _, f_1 = self._attention_fcn(X_q, Xc, 'f1_shared'+'_'+str(i), True, return_alpha=True)
                    _, f_2 = self._attention_fcn(self.target_item_embedding, Xc, 'f2_shared'+'_'+str(i), True, return_alpha=True)
                cluster_score += [f_1 + f_2]
            cluster_score = tf.reduce_mean(tf.stack(cluster_score, 0), 0)
            boolean_mask = tf.equal(self.mask, tf.ones_like(self.mask))
            mask_paddings = tf.ones_like(cluster_score) * (-(2 ** 32) + 1)
            cluster_score = tf.nn.softmax(
                tf.where(boolean_mask, cluster_score, mask_paddings),
                axis = -1
            )

            ## graph pooling
            num_nodes = tf.reduce_sum(self.mask, 1) # B
            boolean_pool = tf.greater(num_nodes, self.pool_length)
            to_keep = tf.where(boolean_pool, 
                               tf.cast(self.pool_length + (self.real_sequence_length - self.pool_length)/self.pool_layers*(self.pool_layers-layer-1), tf.int32), 
                               num_nodes)  # B
            cluster_score = cluster_score * self.float_mask # B*L
            if 'kwai' in socket.gethostname():
                sorted_score = tf.contrib.framework.sort(cluster_score, direction='DESCENDING', axis=-1) # B*L
                target_index = tf.stack([tf.range(tf.shape(Xc)[0]), tf.cast(to_keep, tf.int32)], 1) # B*2
                target_score = tf.gather_nd(sorted_score, target_index) + K.epsilon() # indices[:-1]=(B) + data[indices[-1]=() --> (B)
            else:
                sorted_score = tf.sort(cluster_score, direction='DESCENDING', axis=-1) # B*L
                target_score = tf.gather_nd(sorted_score, tf.expand_dims(tf.cast(to_keep, tf.int32), -1), batch_dims=1) + K.epsilon() # indices[:-1]=(B) + data[indices[-1]=() --> (B)
            topk_mask = tf.greater(cluster_score, tf.expand_dims(target_score, -1)) # B*L + B*1 -> B*L
            self.mask = tf.cast(topk_mask, tf.int32)
            self.float_mask = tf.cast(self.mask, tf.float32)
            self.reduced_sequence_length = tf.reduce_sum(self.mask, 1)

            ## ensure graph connectivity 
            E = E * tf.expand_dims(self.float_mask, -1) * tf.expand_dims(self.float_mask, -2)
            A = tf.matmul(tf.matmul(E, A_bool),
                          tf.transpose(E, (0,2,1))) # B*C*L x B*L*L x B*L*C = B*C*C
            ## graph readout 
            graph_readout = tf.reduce_sum(Xc*tf.expand_dims(cluster_score,-1)*tf.expand_dims(self.float_mask, -1), 1)

        return Xc, A, graph_readout, cluster_score


In [5]:
#coding=utf-8
#pylint: disable=no-member
#pylint: disable=no-name-in-module
#pylint: disable=import-error

from absl import app
from absl import flags
from absl import logging

import sys
sys.path.append("../../")
import os
import socket
import getpass
import smtplib
from email.mime.text import MIMEText

import setproctitle

import tensorflow as tf
import time

from reco_utils.common.constants import SEED
from deeprec_utils import (
    prepare_hparams
)
from reco_utils.dataset.sequential_reviews import data_preprocessing, strong_data_preprocessing
from reco_utils.dataset.sequential_reviews import group_sequence


from reco_utils.recommender.deeprec.io.sequential_iterator import (
    SequentialIterator,
    SASequentialIterator,
    RecentSASequentialIterator,
    ShuffleSASequentialIterator
)

from reco_utils.common.visdom_utils import VizManager
from tensorboardX import SummaryWriter
from tensorflow.python.tools import inspect_checkpoint as chkp

In [7]:
FLAGS = flags.FLAGS

<absl.flags._flagvalues.FlagValues at 0x108937a10>

In [8]:
if 'kwai' in socket.gethostname():
    #  flags.DEFINE_string('name', 'kuaishou-GRU4REC', 'Experiment name.')
    #  flags.DEFINE_string('name', 'kuaishou-CASER', 'Experiment name.')
    #  flags.DEFINE_string('name', 'kuaishou-DIN', 'Experiment name.')
    #  flags.DEFINE_string('name', 'kuaishou-DIEN', 'Experiment name.')
    #  flags.DEFINE_string('name', 'kuaishou-SLIREC', 'Experiment name.')
    flags.DEFINE_string('name', 'kuaishou-SURGE', 'Experiment name.')

    flags.DEFINE_string('dataset', 'kuaishou', 'Dataset name.')
    flags.DEFINE_integer('val_num_ngs', 1, 'Number of negative instances with a positiver instance for validation.')
    flags.DEFINE_integer('test_num_ngs', 1, 'Number of negative instances with a positive instance for testing.')
    flags.DEFINE_integer('batch_size', 500, 'Batch size.')
    #  flags.DEFINE_string('model', 'GRU4REC', 'Experiment name.')
    #  flags.DEFINE_string('model', 'CASER', 'Experiment name.')
    #  flags.DEFINE_string('model', 'DIN', 'Experiment name.')
    #  flags.DEFINE_string('model', 'DIEN', 'Experiment name.')
    #  flags.DEFINE_string('model', 'SLIREC', 'Experiment name.')
    flags.DEFINE_string('model', 'SURGE', 'Experiment name.')
    flags.DEFINE_float('embed_l2', 1e-6, 'L2 regulation for embeddings.')
    flags.DEFINE_float('layer_l2', 1e-6, 'L2 regulation for layers.')
    flags.DEFINE_integer('gpu_id', 0, 'GPU ID.')
    flags.DEFINE_integer('contrastive_length_threshold', 10, 'Minimum sequence length value to apply contrastive loss.')
    flags.DEFINE_integer('contrastive_recent_k', 5, 'Use the most recent k embeddings to compute short-term proxy.')
else:
    #  flags.DEFINE_string('name', 'taobao-GRU4REC', 'Experiment name.')
    #  flags.DEFINE_string('name', 'taobao-CASER', 'Experiment name.')
    #  flags.DEFINE_string('name', 'taobao-DIN', 'Experiment name.')
    #  flags.DEFINE_string('name', 'taobao-DIEN', 'Experiment name.')
    #  flags.DEFINE_string('name', 'taobao-SLIREC', 'Experiment name.')
    flags.DEFINE_string('name', 'taobao-SURGE', 'Experiment name.')
    #  flags.DEFINE_string('dataset', 'taobao', 'Dataset name.')
    flags.DEFINE_string('dataset', 'taobao_global', 'Dataset name.')
    #  flags.DEFINE_string('dataset', 'amazon', 'Dataset name.')
    flags.DEFINE_integer('gpu_id', 1, 'GPU ID.')
    flags.DEFINE_integer('val_num_ngs', 4, 'Number of negative instances with a positiver instance for validation.')
    flags.DEFINE_integer('test_num_ngs', 99, 'Number of negative instances with a positive instance for testing.')
    flags.DEFINE_integer('batch_size', 500, 'Batch size.')
    #  flags.DEFINE_integer('port', 8123, 'Port for visdom.') # for step visual 
    flags.DEFINE_integer('port', 8023, 'Port for visdom.') # for epoch visual
    #  flags.DEFINE_string('model', 'GRU4REC', 'Model name.')
    #  flags.DEFINE_string('model', 'CASER', 'Model name.')
    #  flags.DEFINE_string('model', 'DIN', 'Model name.')
    #  flags.DEFINE_string('model', 'DIEN', 'Model name.')
    #  flags.DEFINE_string('model', 'DCASER', 'Model name.')
    #  flags.DEFINE_string('model', 'SLIREC', 'Model name.')
    flags.DEFINE_string('model', 'SURGE', 'Model name.')
    flags.DEFINE_float('embed_l2', 1e-6, 'L2 regulation for embeddings.')
    flags.DEFINE_float('layer_l2', 1e-6, 'L2 regulation for layers.')
    flags.DEFINE_integer('contrastive_length_threshold', 5, 'Minimum sequence length value to apply contrastive loss.')
    flags.DEFINE_integer('contrastive_recent_k', 3, 'Use the most recent k embeddings to compute short-term proxy.')

flags.DEFINE_boolean('amp_time_unit', True, 'Whether to amplify unit for time stamp.')
flags.DEFINE_boolean('only_test', False, 'Only test and do not train.')
flags.DEFINE_boolean('test_dropout', False, 'Whether to dropout during evaluation.')
flags.DEFINE_boolean('write_prediction_to_file', False, 'Whether to write prediction to file.')
flags.DEFINE_boolean('test_counterfactual', False, 'Whether to test with counterfactual data.')
flags.DEFINE_string('test_counterfactual_mode', 'shuffle', 'Mode for counterfactual evaluation, could be original, shuffle or recent.')
flags.DEFINE_integer('counterfactual_recent_k', 10, 'Use recent k interactions to predict the target item.')
flags.DEFINE_boolean('pretrain', False, 'Whether to use pretrain and finetune.')
#  flags.DEFINE_boolean('finetune', True, 'Whether to use pretrain and finetune.')
#  flags.DEFINE_string('finetune_path', '/data/changjianxin/ls-recommenders/saves/GCN/gat-uii_last_pretrain/pretrain/', 'Save path.')
flags.DEFINE_string('finetune_path', '', 'Save path.')
flags.DEFINE_boolean('vector_alpha', False, 'Whether to use vector alpha for long short term fusion.')
flags.DEFINE_boolean('manual_alpha', False, 'Whether to use predefined alpha for long short term fusion.')
flags.DEFINE_float('manual_alpha_value', 0.5, 'Predifined alpha value for long short term fusion.')
flags.DEFINE_boolean('interest_evolve', True, 'Whether to use a GRU to model interest evolution.')
flags.DEFINE_boolean('predict_long_short', True, 'Predict whether the next interaction is driven by long-term interest or short-term interest.')
flags.DEFINE_enum('single_part', 'no', ['no', 'long', 'short'], 'Whether to use only long, only short or both.')
flags.DEFINE_integer('is_clip_norm', 1, 'Whether to clip gradient norm.')
flags.DEFINE_boolean('use_complex_attention', True, 'Whether to use complex attention like DIN.')
flags.DEFINE_boolean('use_time4lstm', True, 'Whether to use Time4LSTMCell proposed by SLIREC.')
flags.DEFINE_integer('epochs', 100, 'Number of epochs.')
flags.DEFINE_integer('early_stop', 2, 'Patience for early stop.')
flags.DEFINE_integer('pretrain_epochs', 10, 'Number of pretrain epochs.')
flags.DEFINE_integer('finetune_epochs', 100, 'Number of finetune epochs.')
flags.DEFINE_string('data_path', os.path.join("..", "..", "tests", "resources", "deeprec", "sequential"), 'Data file path.')
if socket.gethostname() == 'rl3':
    #  flags.DEFINE_string('save_path', '/data/changjianxin/ls-recommenders/saves/', 'Save path.')  #  for step visual
    flags.DEFINE_string('save_path', '/data/changjianxin/ls-recommenders/saves_epoch/', 'Save path.')  #  for epoch visual
else:
    flags.DEFINE_string('save_path', '../../saves/', 'Save path.')
    #  flags.DEFINE_string('save_path', '../../saves_step/', 'Save path.')
flags.DEFINE_integer('train_num_ngs', 4, 'Number of negative instances with a positive instance for training.')
flags.DEFINE_float('sample_rate', 1.0, 'Fraction of samples for training and testing.')
flags.DEFINE_float('attn_loss_weight', 0.001, 'Loss weight for supervised attention.')
flags.DEFINE_float('discrepancy_loss_weight', 0.01, 'Loss weight for discrepancy between long and short term user embedding.')
flags.DEFINE_float('contrastive_loss_weight', 0.1, 'Loss weight for contrastive of long and short intention.')
flags.DEFINE_float('learning_rate', 0.001, 'Learning rate.')
flags.DEFINE_integer('show_step', 500, 'Step for showing metrics.')
flags.DEFINE_string('visual_type', 'epoch', '') #  for epoch visual
#  flags.DEFINE_string('visual_type', 'step', 'Step for drawing metrics.') #  for step visual
flags.DEFINE_integer('visual_step', 50, 'Step for drawing metrics.')
flags.DEFINE_string('no_visual_host', 'kwai', '')
flags.DEFINE_boolean('enable_mail_service', False, 'Whether to e-mail yourself after each run.')


def get_model(flags_obj, model_path, summary_path, pretrain_path, finetune_path, user_vocab, item_vocab, cate_vocab, train_num_ngs, data_path):

    EPOCHS = flags_obj.epochs
    BATCH_SIZE = flags_obj.batch_size
    RANDOM_SEED = None  # Set None for non-deterministic result

    flags_obj.amp_time_unit = flags_obj.amp_time_unit if flags_obj.model == 'DANCE' else False

    if flags_obj.dataset == 'kuaishou':
        pairwise_metrics = ['mean_mrr', 'ndcg@1;2']
        weighted_metrics = ['wauc']
        max_seq_length = 250
        time_unit = 'ms' if not flags_obj.amp_time_unit else 's'
    elif flags_obj.dataset in ['taobao_global', 'yelp_global']:
        pairwise_metrics = ['mean_mrr', 'ndcg@2;4;6', 'hit@2;4;6']
        weighted_metrics = ['wauc']
        max_seq_length = 50
        time_unit = 's' if not flags_obj.amp_time_unit else 'amp'
    elif flags_obj.dataset == 'kuaishou_open':
        pairwise_metrics = ['mean_mrr', 'ndcg@2;4;6', 'hit@2;4;6']
        weighted_metrics = ['wauc']
        max_seq_length = 200
        time_unit = 'ms' if not flags_obj.amp_time_unit else 's'
    else:
        pairwise_metrics = ['mean_mrr', 'ndcg@2;4;6', 'hit@2;4;6', 'group_auc']
        weighted_metrics = ['wauc']
        max_seq_length = 50
        time_unit = 's'

    if not flags_obj.test_counterfactual:
        if flags_obj.model in ['SLIREC', 'SASLIREC', 'DANCE']:
        #  if flags_obj.model in ['SLIREC', 'SASLIREC', 'DANCE', 'SURGE']:
            input_creator = SASequentialIterator
        else:
            input_creator = SequentialIterator
    else:
        if flags_obj.test_counterfactual_mode == 'original':
            input_creator = SASequentialIterator
        elif flags_obj.test_counterfactual_mode == 'recent':
            input_creator = RecentSASequentialIterator
        elif flags_obj.test_counterfactual_mode == 'shuffle':
            input_creator = ShuffleSASequentialIterator

    if flags_obj.single_part != 'no':
        flags_obj.manual_alpha = True
        if flags_obj.single_part == 'long':
            flags_obj.manual_alpha_value = 1.0
        else:
            flags_obj.manual_alpha_value = 0.0
    elif flags_obj.manual_alpha:
        if flags_obj.manual_alpha_value == 1.0:
            flags_obj.single_part = 'long'
        elif flags_obj.manual_alpha_value == 0.0:
            flags_obj.single_part = 'short'

    #SliRec
    
    # SURGE
    if flags_obj.model == 'SURGE':
        yaml_file = '../../reco_utils/recommender/deeprec/config/gcn.yaml'
        hparams = prepare_hparams(yaml_file, 
                                embed_l2=flags_obj.embed_l2, 
                                layer_l2=flags_obj.layer_l2, 
                                learning_rate=flags_obj.learning_rate, 
                                epochs=EPOCHS,
                                EARLY_STOP=flags_obj.early_stop,
                                batch_size=BATCH_SIZE,
                                show_step=flags_obj.show_step,
                                visual_step=flags_obj.visual_step,
                                visual_type=flags_obj.visual_type,
                                MODEL_DIR=model_path,
                                SUMMARIES_DIR=summary_path,
                                PRETRAIN_DIR=pretrain_path,
                                FINETUNE_DIR=finetune_path,
                                user_vocab=user_vocab,
                                item_vocab=item_vocab,
                                cate_vocab=cate_vocab,
                                need_sample=True,
                                train_num_ngs=train_num_ngs, # provides the number of negative instances for each positive instance for loss computation.
                                max_seq_length=max_seq_length, 
                                hidden_size=40,
                                train_dir=os.path.join(data_path, r'train_data'),
                                graph_dir=os.path.join(data_path, 'graphs'),
                                pairwise_metrics=pairwise_metrics,
                                weighted_metrics=weighted_metrics,
                    )
        model = SURGEModel(hparams, input_creator, seed=RANDOM_SEED)

    
    return model




def main():

    flags_obj = FLAGS

    setproctitle.setproctitle('{}@changjianxin'.format(flags_obj.name))
    # os.environ["CUDA_VISIBLE_DEVICES"] = str(flags_obj.gpu_id)

    print("System version: {}".format(sys.version))
    print("Tensorflow version: {}".format(tf.__version__))

    if flags_obj.enable_mail_service:
        mail_master = MailMaster(flags_obj)

    print('start experiment')

    data_path = os.path.join(flags_obj.data_path, flags_obj.dataset)
    if flags_obj.dataset == 'amazon':
        reviews_name = 'reviews_Movies_and_TV_5.json'
        meta_name = 'meta_Movies_and_TV.json'
    elif flags_obj.dataset == 'yelp':
        reviews_name = 'yelp_academic_dataset_review.json'
        meta_name = 'yelp_academic_dataset_business.json'
    elif flags_obj.dataset == 'taobao':
        reviews_name = 'UserBehavior.csv'
        meta_name = ''
        strong_behavior_name = 'UserBuy.csv'
    elif flags_obj.dataset == 'yelp_global':
        reviews_name = 'yelp_academic_dataset_review.json'
        meta_name = 'yelp_academic_dataset_business.json'
    elif flags_obj.dataset == 'taobao_global':
        reviews_name = 'UserBehavior.csv'
        meta_name = ''
        strong_behavior_name = 'UserBuy.csv'
    elif flags_obj.dataset == 'kuaishou':
        reviews_name = 'kuaishou.csv'
        meta_name = ''
    elif flags_obj.dataset == 'kuaishou_open':
        reviews_name = 'dataset.pkl'
        meta_name = 'visual64_select.npy'
        category_name = 'kuaishou_open_business_recommenders.csv'

    # for test
    train_file = os.path.join(data_path, r'train_data')
    valid_file = os.path.join(data_path, r'valid_data')
    test_file = os.path.join(data_path, r'test_data')
    user_vocab = os.path.join(data_path, r'user_vocab.pkl')
    item_vocab = os.path.join(data_path, r'item_vocab.pkl')
    cate_vocab = os.path.join(data_path, r'category_vocab.pkl')
    output_file = os.path.join(data_path, r'output.txt')

    reviews_file = os.path.join(data_path, reviews_name)
    meta_file = os.path.join(data_path, meta_name)
    train_num_ngs = flags_obj.train_num_ngs
    valid_num_ngs = flags_obj.val_num_ngs
    test_num_ngs = flags_obj.test_num_ngs
    sample_rate = flags_obj.sample_rate

    input_files = [reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab]

    if not os.path.exists(train_file):
        data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs, dataset=flags_obj.dataset)
    if not os.path.exists(test_file+'_group1'):
        if flags_obj.dataset == 'kuaishou':
            split_length = [50, 100, 150, 200]
        elif flags_obj.dataset == 'taobao_global':
            split_length = [10, 20, 30, 40]
        group_sequence(test_file=test_file, split_length=split_length)

    if flags_obj.dataset in ['taobao', 'taobao_global', 'kuaishou_open']:
        if flags_obj.dataset in ['taobao', 'taobao_global']:
            strong_last_test_file = os.path.join(data_path, r'strong_last_test_data')
            strong_first_test_file = os.path.join(data_path, r'strong_first_test_data')
            weak_test_file = os.path.join(data_path, r'weak_test_data')
            strong_last_vocab = os.path.join(data_path, r'strong_last_vocab.pkl')
            strong_first_vocab = os.path.join(data_path, r'strong_first_vocab.pkl')
            strong_file = os.path.join(data_path, strong_behavior_name)
            if not os.path.exists(strong_last_test_file) or not os.path.exists(strong_first_test_file):
                raw_data = (strong_last_test_file, strong_first_test_file, weak_test_file, strong_last_vocab, strong_first_vocab)
                strong_data_preprocessing(raw_data, test_file, strong_file, user_vocab, item_vocab, 
                                            test_num_ngs=test_num_ngs, dataset=flags_obj.dataset)
        elif flags_obj.dataset == 'kuaishou_open':
            strong_like_last_test_file = os.path.join(data_path, r'strong_like_last_test_data')
            strong_like_first_test_file = os.path.join(data_path, r'strong_like_first_test_data')
            strong_follow_last_test_file = os.path.join(data_path, r'strong_follow_last_test_data')
            strong_follow_first_test_file = os.path.join(data_path, r'strong_follow_first_test_data')
            weak_test_file = os.path.join(data_path, r'weak_test_data')
            strong_like_last_vocab = os.path.join(data_path, r'strong_like_last_vocab.pkl')
            strong_like_first_vocab = os.path.join(data_path, r'strong_like_first_vocab.pkl')
            strong_follow_last_vocab = os.path.join(data_path, r'strong_follow_last_vocab.pkl')
            strong_follow_first_vocab = os.path.join(data_path, r'strong_follow_first_vocab.pkl')
            strong_file = os.path.join(data_path, reviews_name)
            category_file = os.path.join(data_path, category_name)
            if not os.path.exists(strong_like_last_test_file) or not os.path.exists(strong_like_first_test_file) \
                or not os.path.exists(strong_follow_last_test_file) or not os.path.exists(strong_follow_first_test_file):
                raw_data = (strong_like_last_test_file, strong_like_first_test_file, strong_follow_last_test_file, strong_follow_first_test_file, weak_test_file, 
                             strong_like_last_vocab, strong_like_first_vocab, strong_follow_last_vocab, strong_follow_first_vocab)
                strong_data_preprocessing(raw_data, test_file, strong_file, user_vocab, item_vocab, 
                                            test_num_ngs=test_num_ngs, dataset=flags_obj.dataset, category_file=category_file)


    save_path = os.path.join(flags_obj.save_path, flags_obj.model, flags_obj.name)
    model_path = os.path.join(save_path, "model/")
    summary_path = os.path.join(save_path, "summary/")
    pretrain_path = os.path.join(save_path, "pretrain/")
    #  finetune_path = os.path.join(flags_obj.save_path, 'GCN/gat-uii_last_pretrain/' ,"pretrain/")
    finetune_path = flags_obj.finetune_path

    model = get_model(flags_obj, model_path, summary_path, pretrain_path, finetune_path, user_vocab, item_vocab, cate_vocab, train_num_ngs, data_path)

    if flags_obj.test_counterfactual:
        ckpt_path = tf.train.latest_checkpoint(model_path)
        model.load_model(ckpt_path)
        calc_mean_alpha = False
        if flags_obj.model in ['SLIREC', 'DANCE']:
            calc_mean_alpha = True
        print('weak interaction:')
        res_weak = model.run_weighted_eval(weak_test_file, num_ngs=test_num_ngs, calc_mean_alpha=calc_mean_alpha) # test_num_ngs is the number of negative lines after each positive line in your test_file
        print(res_weak)

        if flags_obj.dataset in ['taobao', 'taobao_global']:
            print('strong last interaction:')
            res_strong_last = model.run_weighted_eval(strong_last_test_file, num_ngs=test_num_ngs, calc_mean_alpha=calc_mean_alpha) # test_num_ngs is the number of negative lines after each positive line in your test_file
            print(res_strong_last)
            print('strong first interaction:')
            res_strong_first = model.run_weighted_eval(strong_first_test_file, num_ngs=test_num_ngs, calc_mean_alpha=calc_mean_alpha) # test_num_ngs is the number of negative lines after each positive line in your test_file
            print(res_strong_first)

            if flags_obj.enable_mail_service:
                mail_master.send_mail(flags_obj, 'counterfactual', [res_strong_last, res_strong_first, res_weak])

        elif flags_obj.dataset == 'kuaishou_open':
            print('strong like last interaction:')
            res_strong_like_last = model.run_weighted_eval(strong_like_last_test_file, num_ngs=test_num_ngs, calc_mean_alpha=calc_mean_alpha) # test_num_ngs is the number of negative lines after each positive line in your test_file
            print(res_strong_like_last)
            print('strong like first interaction:')
            res_strong_like_first = model.run_weighted_eval(strong_like_first_test_file, num_ngs=test_num_ngs, calc_mean_alpha=calc_mean_alpha) # test_num_ngs is the number of negative lines after each positive line in your test_file
            print(res_strong_like_first)
            print('strong follow last interaction:')
            res_strong_follow_last = model.run_weighted_eval(strong_follow_last_test_file, num_ngs=test_num_ngs, calc_mean_alpha=calc_mean_alpha) # test_num_ngs is the number of negative lines after each positive line in your test_file
            print(res_strong_follow_last)
            print('strong follow first interaction:')
            res_strong_follow_first = model.run_weighted_eval(strong_follow_first_test_file, num_ngs=test_num_ngs, calc_mean_alpha=calc_mean_alpha) # test_num_ngs is the number of negative lines after each positive line in your test_file
            print(res_strong_follow_first)

            if flags_obj.enable_mail_service:
                mail_master.send_mail(flags_obj, 'counterfactual', [res_strong_like_last, res_strong_like_first, res_strong_follow_last, res_strong_follow_first, res_weak])


        return

    if flags_obj.only_test:
        ckpt_path = tf.train.latest_checkpoint(model_path)
        model.load_model(ckpt_path)
        res_syn = model.run_weighted_eval(test_file, num_ngs=test_num_ngs) # test_num_ngs is the number of negative lines after each positive line in your test_file
        print(flags_obj.name)
        print(res_syn)

        #  for g in [1,2,3,4,5]:
            #  res_syn_group = model.run_weighted_eval(test_file+'_group'+str(g), num_ngs=test_num_ngs)
            #  print(flags_obj.name+'_group'+str(g))
            #  print(res_syn_group)

        if flags_obj.enable_mail_service:
            mail_master.send_mail(flags_obj, 'test', res)

        return

    #  if flags_obj.pretrain:
        #  ckpt_path = tf.train.latest_checkpoint(pretrain_path)
        #  model.load_model(ckpt_path)

        #  variables = tf.contrib.framework.get_variables_to_restore()
        #  variables = tf.contrib.framework..get_variables_to_restore()
        #  print(variables)
        #  variables_to_resotre = [v for v in variables if v.name.split('/')[-1] in ['item_embedding','user_embedding']]
        #  print(variables_to_resotre)
        #  saver_emb = tf.compat.v1.train.Saver(variables_to_resotre)
        #  saver_emb = tf.compat.v1.train.Saver(model.item_lookup)
        #  saver_emb.restore(self.sess, ckpt_path)
        #  chkp.print_tensors_in_checkpoint_file(ckpt_path, tensor_name='', all_tensors=True)
        #  chkp.print_tensors_in_checkpoint_file(ckpt_path, tensor_name='sequential/embedding/item_embedding', all_tensors=False)
        #  chkp.print_tensors_in_checkpoint_file(ckpt_path, tensor_name='sequential/embedding/user_embedding', all_tensors=False)

        #  self.item_lookup = tf.concat([self.item_lookup, self.cate_embedding], axis=1)
        #  self.user_lookup = tf.concat([self.item_lookup, self.cate_embedding], axis=1)
        #  import pdb; pdb.set_trace()


    if flags_obj.no_visual_host not in socket.gethostname():
        vm = VizManager(flags_obj)
        vm.show_basic_info(flags_obj)
    else:
        vm = None
    #  visual_path = summary_path
    visual_path = os.path.join(save_path, "metrics/")
    tb = SummaryWriter(log_dir=visual_path, comment='tb')

    #  print(model.run_weighted_eval(test_file, num_ngs=test_num_ngs)) # test_num_ngs is the number of negative lines after each positive line in your test_file

    if flags_obj.dataset in ['kuaishou', 'kuaishou_open', 'taobao_global', 'yelp_global']:
        eval_metric = 'wauc'
    else:
        eval_metric = 'group_auc'

    start_time = time.time()
    model = model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs, eval_metric=eval_metric, vm=vm, tb=tb, pretrain=flags_obj.pretrain) 
    # valid_num_ngs is the number of negative lines after each positive line in your valid_file 
    # we will evaluate the performance of model on valid_file every epoch
    end_time = time.time()
    cost_time = end_time - start_time
    print('Time cost for training is {0:.2f} mins'.format((cost_time)/60.0))

    ckpt_path = tf.train.latest_checkpoint(model_path)
    model.load_model(ckpt_path)
    res_syn = model.run_weighted_eval(test_file, num_ngs=test_num_ngs)
    print(flags_obj.name)
    print(res_syn)

    for g in [1,2,3,4,5]:
        res_syn_group = model.run_weighted_eval(test_file+'_group'+str(g), num_ngs=test_num_ngs)
        print(flags_obj.name+'_group'+str(g))
        print(res_syn_group)

    if flags_obj.no_visual_host not in socket.gethostname():
        vm.show_test_info()
        vm.show_result(res_syn)

    tb.close()

    if flags_obj.enable_mail_service:
        mail_master.send_mail(flags_obj, 'train', res)

    if flags_obj.write_prediction_to_file:
        model = model.predict(test_file, output_file)


if __name__ == "__main__":
    
    app.run(main)


FATAL Flags parsing error: Unknown command line flag 'f'
Pass --helpshort or --helpfull to see help on flags.


AttributeError: 'tuple' object has no attribute 'tb_frame'

In [9]:


# Define dataset paths and files
data_path = ""
reviews_file = os.path.join(data_path, "reviews_Movies_and_TV_5.json")
meta_file = os.path.join(data_path, "meta_Movies_and_TV.json")
train_file = os.path.join(data_path, "train_data")
valid_file = os.path.join(data_path, "valid_data")
test_file = os.path.join(data_path, "test_data")
user_vocab = os.path.join(data_path, "user_vocab.pkl")
item_vocab = os.path.join(data_path, "item_vocab.pkl")
cate_vocab = os.path.join(data_path, "category_vocab.pkl")

# Define model parameters
model_name = "amazon-SURGE"
save_path = "surge-files"
model_path = os.path.join(save_path, "model")
summary_path = os.path.join(save_path, "summary")
pretrain_path = os.path.join(save_path, "pretrain")
finetune_path = ""
train_num_ngs = 4
epochs = 100
batch_size = 500
learning_rate = 0.001
visual_type = "epoch"

# Prepare model hyperparameters
yaml_file = "../../reco_utils/recommender/deeprec/config/gcn.yaml"
hparams = prepare_hparams(
    yaml_file,
    embed_l2=1e-6,
    layer_l2=1e-6,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=batch_size,
    show_step=500,
    visual_step=50,
    visual_type=visual_type,
    MODEL_DIR=model_path,
    SUMMARIES_DIR=summary_path,
    PRETRAIN_DIR=pretrain_path,
    FINETUNE_DIR=finetune_path,
    user_vocab=user_vocab,
    item_vocab=item_vocab,
    cate_vocab=cate_vocab,
    need_sample=True,
    train_num_ngs=train_num_ngs,
    max_seq_length=250,
    hidden_size=40,
    train_dir=os.path.join(data_path, "train_data"),
    graph_dir=os.path.join(data_path, "graphs"),
    pairwise_metrics=["mean_mrr", "ndcg@1;2"],
    weighted_metrics=["wauc"],
)

# Create SURGE model instance
model = SURGEModel(hparams, SASequentialIterator, seed=None)

# Train the model
start_time = time.time()
model = model.fit(train_file, valid_file, valid_num_ngs=1, eval_metric="wauc")
end_time = time.time()
print("Training time:", end_time - start_time)

# Evaluate the model
res = model.run_weighted_eval(test_file, num_ngs=1)
print("Evaluation results:", res)


ModuleNotFoundError: No module named 'reco_utils.recommender.deeprec.models.surge'