In [1]:
import tensorflow as tf
import numpy as np
import random
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
import sys
import pickle as pkl
sys.path.append('../tftools/')

from tf_object import *

In [2]:
class UserRankModel(TFModel):
    def __init__(self, config, sess, current_task_name='user_rank'):
        super(UserRankModel, self).__init__(config, sess)
        #self.net_size = config.net_size
        self.feature_num = config.feature_num
        self.event_num = config.event_num
        self.current_task_name = current_task_name
        self.init_scale = config.init_scale
        
    def build_input(self):
        with tf.name_scope('input'):
            inputX = tf.placeholder(tf.float32, [None, self.feature_num], name="input_feature")
            eventID = tf.placeholder(tf.int32, [None,2], name='event')
            inputLabel = tf.placeholder(tf.int32, [None], name='label')
            tf.add_to_collection(tf.GraphKeys.INPUTS, inputX)
            tf.add_to_collection(tf.GraphKeys.INPUTS, eventID)
            tf.add_to_collection(tf.GraphKeys.INPUTS, inputLabel)
            self.split_inputX = tf.split(inputX, self.gpu_num, 0)
            self.split_eventID = tf.split(eventID, self.gpu_num, 0)
            self.split_inputLabel = tf.split(inputLabel, self.gpu_num, 0)
        self.__build_global_setting__()
        with tf.name_scope('states_array'):
            self.last_flat = [[] for i in range(0,self.gpu_num)]
     
    def build_mlp_model(self, gpu_id=0, layer_num=2, net_size=512):
        with get_new_variable_scope('embedding') as embedding_scope:
            event_embedding = my_embedding_layer(self.split_eventID[gpu_id], self.event_num, net_size, 
                                  layer_name='embedding_layer', init_scale=self.init_scale)
        dense = self.split_inputX[gpu_id]
        with get_new_variable_scope('mlp') as mlp_scope:
            dense = my_full_connected(dense, net_size, act=tf.nn.relu)
            for i in range(0, layer_num):            
                dense = highway(dense, f=tf.nn.tanh)
            #for i in range(0, layer_num):            
                #dense = highway(dense, f=tf.nn.relu)
        with tf.name_scope("dropout"):
            self.last_flat[gpu_id] = tf.reshape(tf.matmul(event_embedding, tf.reshape(dense, [-1,1,net_size]), adjoint_b=True), [-1,2])
            
    def build_prediction(self, gpu_id=0, accK=1):
        prediction = self.last_flat[gpu_id]
        self.tower_prediction_results.append(tf.nn.softmax(prediction))
        self.params = tf.trainable_variables()[1:]
        with tf.name_scope('loss'): 
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.split_inputLabel[gpu_id], logits=prediction)
            grads, capped_gvs = my_compute_grad(self.opt, loss, self.params, 
                                                clip_type = 'clip_norm', 
                                                max_clip_grad=self.clip_gradients)           
        with tf.name_scope('accuracy'):
            #accuracy = tf.to_float(tf.equal(tf.cast(prediction>0.5,tf.int32), tf.cast(self.split_inputLabel[gpu_id],tf.int32)))
            accuracy = tf.to_float(tf.nn.in_top_k(prediction, self.split_inputLabel[gpu_id],k=accK))
        self.__add_to_tower_list__(grads,capped_gvs,loss,accuracy)
              
    def build_model(self,*args, **kwargs):
        self.build_input()
        for idx, gpu_id in enumerate(self.gpus):
            with tf.device('/gpu:%d' % gpu_id):
                with tf.name_scope('Tower_%d' % (gpu_id)) as tower_scope:
                    gpu_scope = tf.variable_scope('gpu', reuse=(idx!=0))
                    with gpu_scope as gpu_scope:
                        self.build_mlp_model(gpu_id=idx, *args, **kwargs)
                        self.build_prediction(gpu_id=idx)                       
        self.build_model_aggregation()          

In [3]:
flags = tf.app.flags

flags.DEFINE_integer("feature_num", 51, "term number in input sequence(zero mask) [20001]")
flags.DEFINE_integer("event_num", 26, "the max length of input sequence [80]")
flags.DEFINE_float("init_scale", 1.0, "init scale for embedding layer")
flags.DEFINE_float("learning_rate", 0.01, "learning rate [0.001]")
flags.DEFINE_string("opt", 'sgd', "optimizer")
flags.DEFINE_integer("batch_size", 512, "batch size to use during training [128]")
flags.DEFINE_float("clip_gradients", 5.0, "clip gradients to this norm [5.0]")
flags.DEFINE_integer("n_epochs", 100, "number of epoch to use during training [10]")
flags.DEFINE_boolean("epoch_save", True, "save checkpoint or not in each epoch [True]")
flags.DEFINE_integer("print_step", 500, "print step duraing training [100]")
flags.DEFINE_string("logs_dir", "logs/", "logs directory [logs/]")
flags.DEFINE_string("model_dir", "model/", "model directory [model/]")
flags.DEFINE_boolean("dir_clear", False, "clear the log and model directory")
flags.DEFINE_boolean("lr_annealing", True, "use lr annealing or not after each epoch [False]")
flags.DEFINE_integer("gpu_id", 0, "default gpu id [0]")
flags.DEFINE_integer("gpu_num", 4, "gpu_num")

FLAGS = flags.FLAGS

In [4]:
data = np.load('train_data.npy')
#fliter bad case
data = np.delete(data, 8, 1)
data = data[data[:,8]>50]
#get the data
event = np.concatenate([np.random.randint(25, size=[len(data),1]),data[:,-1:]],1)
label = data[:,-2]
data = data[:,:-2]
data = scale(data, axis=0, with_mean=True, with_std=True, copy=True)
#select data 
data = data[event[:,0]!=event[:,1]]
label = label[event[:,0]!=event[:,1]]
event = event[event[:,0]!=event[:,1]]
#balance data
r_idx = np.random.choice(np.where(label==0)[0], int((np.sum(label==0) - np.sum(label==1))/2), replace=False)
event[r_idx, 0], event[r_idx, 1] = event[r_idx, 1], event[r_idx, 0].copy()
label[r_idx] = 1

In [5]:
#idxs = np.append(np.random.choice(np.where(label==1)[0],300000), np.random.choice(np.where(label==0)[0],300000))
idxs = np.arange(0, len(data))
train_idxs, test_idxs = train_test_split(idxs, test_size=0.2)
test_idxs = np.sort(test_idxs)

In [6]:
graph_to_use = tf.Graph()
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth=True
res = {}
with tf.Session(graph=graph_to_use, config=config) as session:
    #cnn_model = TextCNN(FLAGS, session, current_task_name='text_cnn_model')
    #cnn_model.build_model(num_classes=len(set(label[idxs])),max_conv_len=7, num_filters=512, dropout_keep_prob=0.5)
    user_rank_model = UserRankModel(FLAGS, session, current_task_name='user_rank_model_1')
    #label[idxs]
    user_rank_model.build_model(net_size=512)
    user_rank_model.build_model_summary()
    display(user_rank_model.model_summary())
    user_rank_model.run([data,event,label], train_idxs, test_idxs)
    for var in tf.trainable_variables():
        res[var.name] = var.eval()
    with open('param1.pickle', 'wb') as output_file:
        pkl.dump(res, output_file)
    

Initializing


Unnamed: 0,variable_name,variable_shape,parameters
0,global/Variable:0,[],1
1,gpu/embedding/embedding_layer/embedding_table:0,"[26, 512]",13312
2,gpu/mlp/fully_connected/W:0,"[51, 512]",26112
3,gpu/mlp/fully_connected/B:0,[512],512
4,gpu/mlp/highway/output_lin_0/Matrix:0,"[512, 512]",262144
5,gpu/mlp/highway/transform_lin_0/Matrix:0,"[512, 512]",262144
6,gpu/mlp/highway_1/output_lin_0/Matrix:0,"[512, 512]",262144
7,gpu/mlp/highway_1/transform_lin_0/Matrix:0,"[512, 512]",262144


Epoch 1 ... training ...
Minibatch 500 / loss: 0.589347
Minibatch 500 / accuracy: 0.744141
Minibatch 1000 / loss: 0.67645
Minibatch 1000 / accuracy: 0.732422
Minibatch 1500 / loss: 0.509177
Minibatch 1500 / accuracy: 0.771484
Minibatch 2000 / loss: 0.459651
Minibatch 2000 / accuracy: 0.789062
Minibatch 2500 / loss: 0.570081
Minibatch 2500 / accuracy: 0.767578
Minibatch 3000 / loss: 0.527987
Minibatch 3000 / accuracy: 0.763672
Minibatch 3500 / loss: 0.493769
Minibatch 3500 / accuracy: 0.78125
Minibatch 4000 / loss: 0.522761
Minibatch 4000 / accuracy: 0.748047
Minibatch 4500 / loss: 0.549168
Minibatch 4500 / accuracy: 0.746094
Minibatch 5000 / loss: 0.527923
Minibatch 5000 / accuracy: 0.738281
Minibatch 5500 / loss: 0.500128
Minibatch 5500 / accuracy: 0.757812
epoch time: 0.8756827473640442
Epoch 1 training accuracy: 0.763022616077
Epoch 1 ... test ...
Minibatch 500 / loss: 0.430982
Minibatch 500 / accuracy: 0.84375
Minibatch 1000 / loss: 0.392171
Minibatch 1000 / accuracy: 0.871094
Epoc

Minibatch 3500 / loss: 0.50132
Minibatch 3500 / accuracy: 0.777344
Minibatch 4000 / loss: 0.499854
Minibatch 4000 / accuracy: 0.775391
Minibatch 4500 / loss: 0.527694
Minibatch 4500 / accuracy: 0.757812
Minibatch 5000 / loss: 0.489904
Minibatch 5000 / accuracy: 0.761719
Minibatch 5500 / loss: 0.454524
Minibatch 5500 / accuracy: 0.794922
epoch time: 0.8354515353838603
Epoch 7 training accuracy: 0.772868251711
Epoch 7 ... test ...
Minibatch 500 / loss: 0.448714
Minibatch 500 / accuracy: 0.835938
Minibatch 1000 / loss: 0.406151
Minibatch 1000 / accuracy: 0.839844
Epoch 7 test accuracy: 0.773053430422
{'loss': 0.50333869349822713, 'valid_los': 0.50402581059331741, 'best_accuracy': 0.78171182524092797, 'best_test_accuracy': 0.78226865538188139, 'epoch': 6, 'learning_rate': 0.006666666666666667, 'valid_perplexity': 1.6553720887414378}
Epoch 8 ... training ...
Minibatch 500 / loss: 0.510713
Minibatch 500 / accuracy: 0.796875
Minibatch 1000 / loss: 0.506873
Minibatch 1000 / accuracy: 0.773438


KeyboardInterrupt: 