# 本例程是利用lstm搭建的recurrent neural network来对imdb电影评论数据进行分类

In [4]:
from __future__ import division
from __future__ import print_function

In [30]:
import tensorflow as tf
import numpy as np

In [6]:
# max total num of words
max_features = 5000

In [11]:
from utils import imdb
from utils import sequence

In [12]:
print('loading data')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

loading data
25000 train sequences
25000 test sequences


In [13]:
print(y_train.shape)

(25000,)


In [14]:
maxlen = 400
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

In [15]:
# left padding with right align
print(X_train.shape)
print(X_train[0])

(25000, 400)
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    1   14   22   16   43  530  973 1622 1385   65  458 4468   66

In [16]:
# build the graph functions
# add the placeholders
def add_placeholders():
    input_placeholder = tf.placeholder(tf.int32, shape=[batch_size, num_steps])
    label_placeholder = tf.placeholder(tf.float32, shape=[batch_size])
    
    return input_placeholder, label_placeholder

In [17]:
# create the feed_dict
def create_feed_dict(input_placeholder, input_batch, label_placeholder, label_batch):
    feed_dict = {input_placeholder: input_batch,
                label_placeholder:label_batch}
    return feed_dict

In [18]:
def add_embed_layer(vocab_size, input_placeholder):
    with tf.device('/cpu:0'):
        embed = tf.get_variable(name="Embedding", shape=[vocab_size, embed_size])
        inputs = tf.nn.embedding_lookup(embed, input_placeholder)
        inputs = [tf.squeeze(input, squeeze_dims=[1]) for input in tf.split(1, num_steps, inputs)] 
        return inputs

In [19]:
## add training op
def add_train_op(loss):
    train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
    return train_op

In [20]:
## add rnn model
def add_rnn_model(hidden_size, num_steps):
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=0.0)
    #cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]*num_steps)
    cell = lstm_cell
    return cell

In [21]:
# evalate the prediction 
def evaluation(y_pred_sigmoid, label_placeholder):
    y_pred_label = (y_pred_sigmoid > 0.5)
    label_placeholder = tf.cast(label_placeholder, tf.bool)
    correct_pred_num = []
    correct_pred_num.append(tf.reduce_sum(tf.cast(tf.equal(y_pred_label, label_placeholder), tf.int32)))
    correct_pred_num = np.sum(correct_pred_num)
    return correct_pred_num

In [22]:
def do_evaluation(sess, X, y):
    total_correct_num = 0
    num_steps = len(X) // batch_size
    for step in range(num_steps):
        # generate the data feed dict
        if step == 0:
            init_state = sess.run([initial_state])
        else:
            init_state = state_step[-1]
        input_batch = X[step*batch_size:(step+1)*batch_size, :]
        label_batch = y[step*batch_size:(step+1)*batch_size]

        feed = {input_placeholder:input_batch, label_placeholder:label_batch, initial_state_placeholder:init_state }
        state_step, correct_num_step = sess.run([state, correct_num], feed)
        total_correct_num += correct_num_step
    print('Testing Accuracy: %f' %(total_correct_num/(num_steps*batch_size)))

In [23]:
#when debug set max_epochs = 1
max_epochs = 15
batch_size = 20
hidden_size = 100
num_steps = maxlen
embed_size = 50

In [24]:
# majority classfier
majority = np.sum(y_train) / len(X_train)
if majority > 0.5:
    print('the majority classifier accuracy is %f' %(majority))
else:
    print('the majority classifier accuracy is %f' %(1-majority))

the majority classifier accuracy is 0.500000


In [25]:
print(len(X_train))

print(np.sum(y_train))

25000
12500


In [28]:
with tf.Graph().as_default(), tf.Session() as sess:
    input_placeholder, label_placeholder = add_placeholders()
    initial_state_placeholder = tf.placeholder(tf.float32)
    vocab_size = max_features
    inputs = add_embed_layer(vocab_size, input_placeholder)
    
    ##initial state
    cell = add_rnn_model(hidden_size, num_steps)
    initial_state = cell.zero_state(batch_size, tf.float32)

    # state is the final state
    outputs, state = tf.nn.rnn(cell, inputs, initial_state=initial_state)
    #add projection layer
    W = tf.get_variable('Weights', shape=[hidden_size, 1])
    b = tf.get_variable('Bias', shape = [batch_size, 1])
    
    y_pred = tf.squeeze(tf.matmul(outputs[-1], W) + b)
    
    y_pred_sigmoid = tf.sigmoid(y_pred)
    
    correct_num = evaluation(y_pred_sigmoid, label_placeholder)
    
    loss = tf.nn.sigmoid_cross_entropy_with_logits(y_pred, label_placeholder)
    
    train_op = add_train_op(loss)
    
    #state_step = initial_state
    sess.run(tf.initialize_all_variables())

    for epoch in range(max_epochs):
        print('%d Epoch starts, Training....' %(epoch))
        mean_loss = []
        total_correct_num = 0
        for step in range(len(X_train) // batch_size):
            # generate the data feed dict
            if step == 0:
                init_state = sess.run([initial_state])
            else:
                init_state = state_step[-1]
            input_batch = X_train[step*batch_size:(step+1)*batch_size, :]
            label_batch = y_train[step*batch_size:(step+1)*batch_size]
            
            '''
            feed = create_feed_dict(input_placeholder, input_batch, 
                                    label_placeholder, label_batch, 
                                    initial_state_placeholder, initial_state)
            '''
            feed = {input_placeholder:input_batch, label_placeholder:label_batch, initial_state_placeholder:init_state }
            _, state_step , y_pred_step, loss_step, correct_num_step= sess.run([train_op, state, y_pred, loss, correct_num], feed)
            
            loss_step = np.sum(loss_step)
            mean_loss.append(loss_step)
            total_correct_num += correct_num_step
            
            if step % 100 == 0:
                print('step %d / %d : loss : %f' %(step, len(X_train) // batch_size, np.mean(mean_loss)))
                mean_loss = []
        print('precision: %f' %(total_correct_num/len(X_train)))
        print('Testing....')
        do_evaluation(sess, X_test, y_test)

0 Epoch starts, Training....
step 0 / 1250 : loss : 13.732358
step 100 / 1250 : loss : 13.916043


KeyboardInterrupt: 

In [27]:
help(tf.nn.sigmoid_cross_entropy_with_logits)

Help on function sigmoid_cross_entropy_with_logits in module tensorflow.python.ops.nn:

sigmoid_cross_entropy_with_logits(logits, targets, name=None)
    Computes sigmoid cross entropy given `logits`.
    
    Measures the probability error in discrete classification tasks in which each
    class is independent and not mutually exclusive.  For instance, one could
    perform multilabel classification where a picture can contain both an elephant
    and a dog at the same time.
    
    For brevity, let `x = logits`, `z = targets`.  The logistic loss is
    
          z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
        = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
        = (1 - z) * x + log(1 + exp(-x))
        = x - x * z + log(1 + exp(-x))
    
    For x < 0, to avoid overflow in exp(-x), we reformulate the a

In [None]:
help(tf.nn.rnn_cell.BasicLSTMCell)

In [None]:
help(tf.shape)

In [None]:
tf.reset_default_graph()

# Create input data
#X = np.random.randn(2, 10, 8)
X= tf.random_uniform(dtype=tf.float,shape=[2,10,8])
# The second example is of length 6 
#X[1,6,:] = 0
#X_lengths = [10, 6]
X_new = []
for index in range(8):
    X_new.append(X[:,:,index])
#X_new = (tf.squeeze(X[0,:,:]),tf.squeeze(X[1,:,:]))
cell = tf.nn.rnn_cell.LSTMCell(num_units=64, state_is_tuple=True)
#cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 10)

outputs, last_states = tf.nn.rnn(
    cell=cell,
    dtype=tf.float64,
    sequence_length=X_lengths,
    inputs=X_new)

result = tf.contrib.learn.run_n(
    {"outputs": outputs, "last_states": last_states},
    n=1,
    feed_dict=None)

assert result[0]["outputs"].shape == (2, 10, 64)
print(result[0]["outputs"])

# Outputs for the second example past past length 6 should be 0
assert (result[0]["outputs"][1,7,:] == np.zeros(cell.output_size)).all()