In [1]:
from text_utils import *

#current version
#export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.0.1-cp36-cp36m-linux_x86_64.whl
import tensorflow as tf
from tensorflow.contrib.rnn.python.ops import core_rnn_cell as rnn_cell
from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn,dynamic_rnn
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
import pickle as pkl
import pandas as pd
from os import walk
import h5py
import sys
sys.path.append('../tfmodels')
#from tf_object import *
from sequential_model import *
from collections import Counter

data_path = 'data'

In [2]:
f = open('{}/emotion_sents.dat'.format(data_path),'rb')
new_sentences = pkl.load(f)
label_str = pkl.load(f)
f.close()
df = pd.read_excel('{}/emotion.xlsx'.format(data_path))
maxlen = 140
phrase_dict = dict(zip(df.phrase, df.po1))
id_term_map = pkl.load(open('{}/term_map.dat'.format(data_path),'rb'))
term_dict = dict((v,k) for k,v in id_term_map.items())

In [3]:
file_path = '{}/emotion_documents'.format(data_path)
h5f = h5py.File(file_path + '.h5', 'r')
documents = np.array(h5f['documents'])
seq_len = np.array(h5f['seq_len'])
label = np.array(h5f['label'])
new_idx = np.array(h5f['new_idx'])
nb_words = np.array(h5f['id_term_map_length'])
h5f.close()
label_str = [label_str[idx] for idx in new_idx]

In [4]:
flags = tf.app.flags

flags.DEFINE_integer("nb_words", nb_words, "term number in input sequence(zero mask) [20001]")
flags.DEFINE_integer("maxlen", maxlen, "the max length of input sequence [80]")
flags.DEFINE_integer("num_layers", 2, "the number of rnn layers [1]")
flags.DEFINE_integer("init_std", 0.05, "init_std")
flags.DEFINE_integer("init_scale", 1, "init_scale")
flags.DEFINE_integer("embedding_size", 300, "word embedding size [50]")
flags.DEFINE_integer("hidden_size", 512, "rnn hidden size [128]")
flags.DEFINE_float("keep_prob", 0.9, "keep probability of drop out [0.9]")
flags.DEFINE_float("learning_rate", 0.001, "learning rate [0.001]")
flags.DEFINE_integer("batch_size", 512, "batch size to use during training [128]")
flags.DEFINE_float("clip_gradients", 5.0, "clip gradients to this norm [5.0]")
flags.DEFINE_integer("n_epochs", 1, "number of epoch to use during training [10]")
flags.DEFINE_boolean("epoch_save", True, "save checkpoint or not in each epoch [True]")
flags.DEFINE_integer("print_step", 100, "print step duraing training [100]")
flags.DEFINE_string("logs_dir", "logs/", "logs directory [logs/]")
flags.DEFINE_string("model_dir", "model/", "model directory [model/]")
flags.DEFINE_boolean("dir_clear", False, "clear the log and model directory")
flags.DEFINE_boolean("lr_annealing", False, "use lr annealing or not after each epoch [False]")
flags.DEFINE_string("current_task_name", 'url_self_prediction', "current task name [self_prediction]")
flags.DEFINE_integer("gpu_id", 0, "default gpu id [0]")
flags.DEFINE_integer("gpu_num", 4, "gpu_num")

FLAGS = flags.FLAGS


In [5]:
class AttentionRNNModel(SequentialModel):
    def __init__(self, config, sess, current_task_name='attention_rnn_model'):
        super(AttentionRNNModel, self).__init__(config, sess, current_task_name)
        
    
    def build_input_sequence(self, gpu_id=0, reuse=None):
        #embedding layer
        self.__build_embedding_layer__()
        with get_new_variable_scope('rnn_lstm') as rnn_scope:
            #fw_cell and bw_cell
            cell_fw = rnn_cell.MultiRNNCell([rnn_cell.DropoutWrapper(rnn_cell.LSTMCell(self.hidden_size, use_peepholes=True, 
                                                                                       state_is_tuple=True,reuse=reuse),
                                                             input_keep_prob=self.keep_prob, 
                                                             output_keep_prob=self.keep_prob) 
                                          for _ in range(self.num_layers)], state_is_tuple=True)
            cell_bw = rnn_cell.MultiRNNCell([rnn_cell.DropoutWrapper(rnn_cell.LSTMCell(self.hidden_size, use_peepholes=True, 
                                                                                       state_is_tuple=True,reuse=reuse),
                                                             input_keep_prob=self.keep_prob, 
                                                             output_keep_prob=self.keep_prob) 
                                          for _ in range(self.num_layers)], state_is_tuple=True)
            self.state_list[gpu_id], self.output_list[gpu_id] = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.input_embedding, 
                                                                self.split_seqLengths[gpu_id], dtype=tf.float32)  
            self.state_list[gpu_id] = tf.concat(self.state_list[gpu_id],axis=2)

    def build_single_prediction(self, type='self', gpu_id=0, accK=1, nb_class=2):
        with tf.name_scope('pooling_over_time') as scope:
            pool = tf.nn.max_pool(tf.expand_dims(self.state_list[gpu_id], -1),
                                  ksize=[1, self.maxlen, 1, 1],
                                  strides=[1, 1, 1, 1], padding='VALID', name="pool")
            pool_flat = tf.reshape(pool, [-1, self.hidden_size*2])
        with get_new_variable_scope('prediction') as pred_scope:    
            prediction = my_full_connected(pool_flat, nb_class, add_bias=True, act=tf.identity, init_std=self.init_std)
            self.tower_prediction_results.append(tf.nn.softmax(prediction))
        with tf.name_scope('loss'): 
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.split_label[gpu_id], 
                                                                  logits=prediction)
            if self.params is None:
                self.params = tf.trainable_variables()[1:]  
            grads, capped_gvs = my_compute_grad(self.opt, loss, self.params, 
                                                clip_type = 'clip_norm', 
                                                max_clip_grad=self.clip_gradients)            
        with tf.name_scope('accuracy'):
            accuracy = tf.to_float(tf.nn.in_top_k(prediction, self.split_label[gpu_id],k=accK))        
        self.__add_to_tower_list__(grads, capped_gvs, loss, accuracy, type)
        
    def build_model(self, type='self', accK=5, nb_c1ass=2):
        self.build_input()
        self.build_single_output()
        for idx, gpu_id in enumerate(self.gpus):
            with tf.device('/gpu:%d' % gpu_id):
                with tf.name_scope('Tower_%d' % (gpu_id)) as tower_scope:
                    reuse = (idx!=0)
                    gpu_scope = tf.variable_scope('gpu', reuse=reuse)
                    with gpu_scope as gpu_scope:
                        self.build_input_sequence(gpu_id=idx, reuse=reuse)
                        self.build_single_prediction(type=type,gpu_id=idx,accK=accK,nb_class=nb_c1ass)
        self.build_model_aggregation()      

In [None]:
idxs = np.arange(0, len(seq_len))
label[label<=1] = 0
label[label>=3] = 1
#2387043
idxs = np.append(np.random.choice(np.where(label == 0)[0], 2387043, replace=False),
                 np.random.choice(np.where(label == 1)[0], 2387043, replace=False))
#idxs = np.delete(idxs, np.where(seq_len==1)[0])
#idxs = np.delete(idxs, np.where(label == 2)[0])
#idxs = np.delete(idxs, np.where(label == 3)[0])
#idxs = idxs[0:200000]
train_idxs, test_idxs = train_test_split(idxs, test_size=0.2, random_state=42)
test_idxs = np.sort(test_idxs)

In [None]:
graph_to_use = tf.Graph()
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth=True
with tf.Session(graph=graph_to_use, config=config) as session:
    #cnn_model = TextCNN(FLAGS, session, current_task_name='text_cnn_model')
    #cnn_model.build_model(num_classes=len(set(label[idxs])),max_conv_len=7, num_filters=512, dropout_keep_prob=0.5)
    attention_rnnlm_model = AttentionRNNModel(FLAGS, session, current_task_name='attention_rnnlm_model')
    attention_rnnlm_model.build_model()
    attention_rnnlm_model.build_model_summary()
    display(attention_rnnlm_model.model_summary())
    attention_rnnlm_model.run([documents,seq_len,label], train_idxs, test_idxs)

Initializing


Unnamed: 0,variable_name,variable_shape,parameters
0,global/Variable:0,[],1
1,gpu/embedding/embedding_layer/embedding_table:0,"[15987, 300]",4796100
2,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,"[812, 2048]",1662976
3,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,[2048],2048
4,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,[512],512
5,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,[512],512
6,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,[512],512
7,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,"[1024, 2048]",2097152
8,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,[2048],2048
9,gpu/rnn_lstm/bidirectional_rnn/fw/multi_rnn_ce...,[512],512


Epoch 1 ... training ...
