In [1]:
import os
import pprint
import tensorflow as tf
import math
import time

import random
import numpy as np
from past.builtins import xrange

from data import read_data
import sys
sys.path.append('../tfmodels')
from sequential_model import *



In [2]:
data_dir = 'data'
checkpoint_dir = 'checkpoints'
data_name = 'ptb'

count = []
word2idx = {}

if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

train_data = read_data('%s/%s.train.txt' % (data_dir, data_name), count, word2idx)
valid_data = read_data('%s/%s.valid.txt' % (data_dir, data_name), count, word2idx)
test_data = read_data('%s/%s.test.txt' % (data_dir, data_name), count, word2idx)

idx2word = dict(zip(word2idx.values(), word2idx.keys()))
nwords = len(word2idx)


Read 929589 words from data/ptb.train.txt
Read 73760 words from data/ptb.valid.txt
Read 82430 words from data/ptb.test.txt


In [4]:
maxlen = 100
train_samples = len(range(0, len(train_data) - maxlen, 3))
valid_samples = len(range(0, len(valid_data) - maxlen, 3))
samples = train_samples+valid_samples
idxs = np.arange(0, samples)
train_idxs = idxs[0:train_samples]
test_idxs = idxs[train_samples:]
previous_words = np.empty(shape=(samples, maxlen), dtype=np.int32)
next_words = np.empty(shape=(samples), dtype=np.int32)
global_step = 0

for i in range(0, len(train_data) - maxlen, 3):
    previous_words[global_step] = train_data[i: i + maxlen]
    next_words[global_step] = train_data[i + maxlen]
    global_step += 1

for i in range(0, len(valid_data) - maxlen, 3):
    previous_words[global_step] = valid_data[i: i + maxlen]
    next_words[global_step] = valid_data[i + maxlen]
    global_step += 1

seq_len = np.zeros(shape=(samples), dtype=np.int32) + maxlen

In [5]:
flags = tf.app.flags

flags.DEFINE_integer("nb_words", nwords, "term number in input sequence(zero mask) [20001]")
flags.DEFINE_integer("maxlen", maxlen, "the max length of input sequence [80]")
flags.DEFINE_integer("num_layers", 1, "the number of rnn layers [1]")
flags.DEFINE_integer("init_std", 0.05, "init_std")
flags.DEFINE_integer("init_scale", 1, "init_scale")
flags.DEFINE_integer("embedding_size", 100, "word embedding size [50]")
flags.DEFINE_integer("hidden_size", 128, "rnn hidden size [128]")
flags.DEFINE_float("keep_prob", 0.9, "keep probability of drop out [0.9]")
flags.DEFINE_float("learning_rate", 0.002, "learning rate [0.001]")
flags.DEFINE_integer("batch_size", 512, "batch size to use during training [128]")
flags.DEFINE_float("clip_gradients", 5.0, "clip gradients to this norm [5.0]")
flags.DEFINE_integer("n_epochs", 1, "number of epoch to use during training [10]")
flags.DEFINE_boolean("epoch_save", True, "save checkpoint or not in each epoch [True]")
flags.DEFINE_integer("print_step", 100, "print step duraing training [100]")
flags.DEFINE_string("logs_dir", "logs/", "logs directory [logs/]")
flags.DEFINE_string("model_dir", "model/", "model directory [model/]")
flags.DEFINE_boolean("dir_clear", False, "clear the log and model directory")
flags.DEFINE_boolean("lr_annealing", False, "use lr annealing or not after each epoch [False]")
flags.DEFINE_string("current_task_name", 'url_self_prediction', "current task name [self_prediction]")
flags.DEFINE_integer("gpu_id", 0, "default gpu id [0]")
flags.DEFINE_integer("gpu_num", 4, "gpu_num")

FLAGS = flags.FLAGS


In [6]:
class SequentialModel(RNNLMModel):
    def __init__(self, config, sess, current_task_name='sequence_model'):
        super(SequentialModel, self).__init__(config, sess)
    
    def build_single_prediction(self, gpu_id=0, accK=5, nb_class=None):
        self.params_1 = None
        if nb_class is None:
            nb_class = self.nb_words
        with get_new_variable_scope('prediction') as pred_scope:    
            prediction = my_full_connected(self.output_list[gpu_id][0][-1], nb_class, 
                                       add_bias=True, act=tf.identity, init_std=self.init_std)
            self.tower_prediction_results.append(tf.nn.softmax(prediction))
        with tf.name_scope('loss'): 
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.split_label[gpu_id], 
                                                                  logits=prediction)

            self.params_1 = [param for param in self.input_params]
            self.params_1.extend(tf.trainable_variables()[-2:])
            grads, capped_gvs = my_compute_grad(self.opt, loss, self.params_1, 
                                                clip_type = 'clip_norm', 
                                                max_clip_grad=self.clip_gradients)            
        with tf.name_scope('accuracy'):
            accuracy = tf.to_float(tf.nn.in_top_k(prediction, self.split_label[gpu_id],k=accK))        
        self.__add_to_tower_list__(grads, capped_gvs, loss, accuracy, 'single')
    
    def build_single_output(self):
        with tf.name_scope('output'):
            label = tf.placeholder(tf.int64, [None], name="label")
            self.__add_to_graph_input__([label])
            self.split_label = tf.split(label, self.gpu_num, 0)
    
    def build_output(self, type='self'):
        if isinstance(type, list):
            super(SequentialModel, self).build_output(type[0])
            self.build_single_output()
        else:
            if type == 'single':
                self.build_single_output()
            else:
                super(SequentialModel, self).build_output(type)
    
    def split_parameter(self, param):
        if isinstance(param, list):
            if len(param) > 1:
                return param[0], param[1]
            else:
                return param[0], param[0]
        else:
            return param, param
    
    
    def build_model(self, type=['self','single'], accK=5, nb_class=None):
        self.build_input()
        self.build_output(type)
        accK1, accK2 = self.split_parameter(accK)
        nb_class1, nb_class2 = self.split_parameter(nb_class)
        new_type = type[0] if isinstance(type,list) else type
        for idx, gpu_id in enumerate(self.gpus):
            with tf.device('/gpu:%d' % gpu_id):
                with tf.name_scope('Tower_%d' % (gpu_id)) as tower_scope:
                    gpu_scope = tf.variable_scope('gpu', reuse=(idx!=0))
                    with gpu_scope as gpu_scope:
                        self.build_input_sequence(gpu_id=idx)
                        if isinstance(type, list):
                            self.build_sequence_prediction(type=new_type,gpu_id=idx,accK=accK1,nb_class=nb_class1)
                            self.build_single_prediction(gpu_id=idx,accK=accK2,nb_class=nb_class2)
                        else:
                            if type == 'single':
                                self.build_single_prediction(gpu_id=idx,accK=accK2,nb_class=nb_class2)
                            else:
                                self.build_sequence_prediction(type=new_type,gpu_id=idx,accK=accK1,nb_class=nb_class1)
        self.build_model_aggregation()
        

In [7]:
graph_to_use = tf.Graph()
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth=True
with tf.Session(graph=graph_to_use, config=config) as session:
    rnnlm_model = SequentialModel(FLAGS, session, current_task_name='seq_model')
    rnnlm_model.build_model(type=['self','single'])
    rnnlm_model.build_model_summary()
    #rnnlm_model.model_restore()
    display(rnnlm_model.model_summary())
    rnnlm_model.run([previous_words,seq_len,next_words], train_idxs, test_idxs, run_type='self')
    rnnlm_model.run([previous_words,seq_len,next_words], train_idxs, test_idxs, run_type='single')


Initializing


Unnamed: 0,variable_name,variable_shape,parameters
0,global/Variable:0,[],1.0
1,gpu/embedding/embedding_layer/embedding_table:0,"[10000, 100]",1000000.0
2,gpu/rnn_lstm/rnn/multi_rnn_cell/cell_0/lstm_ce...,"[228, 512]",116736.0
3,gpu/rnn_lstm/rnn/multi_rnn_cell/cell_0/lstm_ce...,[512],512.0
4,gpu/rnn_lstm/rnn/multi_rnn_cell/cell_0/lstm_ce...,[128],128.0
5,gpu/rnn_lstm/rnn/multi_rnn_cell/cell_0/lstm_ce...,[128],128.0
6,gpu/rnn_lstm/rnn/multi_rnn_cell/cell_0/lstm_ce...,[128],128.0
7,gpu/prediction/conv_1d/he_uniform/W:0,"[1, 128, 10000]",1280000.0
8,gpu/prediction/conv_1d/B:0,[10000],10000.0
9,gpu/prediction_1/fully_connected/W:0,"[128, 10000]",1280000.0


('Epoch', 1, '... training ...')
('Minibatch', 100, '/', 'loss:', 6.6084213)
('Minibatch', 100, '/', 'accuracy:', 0.19540612)
('Minibatch', 200, '/', 'loss:', 6.2081985)
('Minibatch', 200, '/', 'accuracy:', 0.24480052)
('Minibatch', 300, '/', 'loss:', 5.8365669)
('Minibatch', 300, '/', 'accuracy:', 0.29508257)
('Minibatch', 400, '/', 'loss:', 5.5646472)
('Minibatch', 400, '/', 'accuracy:', 0.32933199)
('Minibatch', 500, '/', 'loss:', 5.4794588)
('Minibatch', 500, '/', 'accuracy:', 0.33192468)
('Minibatch', 600, '/', 'loss:', 5.3142548)
('Minibatch', 600, '/', 'accuracy:', 0.35432622)
('epoch time:', 10.439584799607594)
('Epoch', 1, 'training accuracy:', 0.27743490408295951)
('Epoch', 1, '... test ...')
('Epoch', 1, 'test accuracy:', 0.34220713674148157)
Model saved in file: model/rnnlm.ckpt
{'valid_los': 5.5011547202976958, 'loss': 5.9858551513327329, 'valid_perplexity': 244.97464620143543, 'learning_rate': 0.002, 'best_accuracy': 0.27743490408295951, 'epoch': 0, 'best_test_accuracy': 