In [1]:
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
batch_size = 128
embedding_dimension = 64
num_classes = 2
hidden_layer_size = 32
times_steps = 6
element_size = 1

In [3]:
# 生成数据
digit_to_word_map = {1:'one', 2:'two', 3:'three', 4:'four', 5:'five', 6:'six',
                    7:'seven', 8:'eight', 9:'nine'}
digit_to_word_map[0] = 'pad'

even_sentences = []
odd_sentences = []
seqlens = []

for i in range(10000):
    rand_seq_len = np.random.choice(range(3,7))
    seqlens.append(rand_seq_len)
    rand_odd_ints = np.random.choice(range(1,10,2), rand_seq_len)
    rand_even_ints = np.random.choice(range(2,10,2), rand_seq_len)
    
    # Padding
    if rand_seq_len<6:
        rand_odd_ints = np.append(rand_odd_ints, [0]*(6-rand_seq_len))
        rand_even_ints = np.append(rand_even_ints, [0]*(6-rand_seq_len))
    
    even_sentences.append(" ".join([digit_to_word_map[r] for r in rand_even_ints]))
    odd_sentences.append(" ".join([digit_to_word_map[r] for r in rand_odd_ints]))

data = even_sentences+odd_sentences
seqlens *= 2
print(data[0:6], data[-1])

['six eight eight four six pad', 'eight two four four two pad', 'eight two eight six eight two', 'eight six four two four four', 'two four four six pad pad', 'two six two six pad pad'] nine five five one pad pad


In [4]:
# map words to indices, like a real NLP application dealing with real data
word2index_map = {}
index = 0

for sen in data:
    for word in sen.split():
        if word not in word2index_map:
            word2index_map[word] = index
            index += 1

index2word_map = {index:word for word,index in word2index_map.items()}
vocabulary_size = len(word2index_map)
print(word2index_map, index2word_map)

{'six': 0, 'eight': 1, 'four': 2, 'pad': 3, 'two': 4, 'five': 5, 'three': 6, 'seven': 7, 'one': 8, 'nine': 9} {0: 'six', 1: 'eight', 2: 'four', 3: 'pad', 4: 'two', 5: 'five', 6: 'three', 7: 'seven', 8: 'one', 9: 'nine'}


In [5]:
# 随机数据集
labels = [1]*10000 + [0]*10000
for i in range(len(labels)):
    label = labels[i]
    tmp = [0]*2
    tmp[label] = 1
    labels[i] = tmp

data_slices = list(range(len(data)))
np.random.shuffle(data_slices)
data = np.array(data)[data_slices]
labels = np.array(labels)[data_slices]
seqlens = np.array(seqlens)[data_slices]

train_x = data[:10000] 
train_y = labels[:10000] 
train_seqlens = seqlens[:10000]

test_x = data[10000:] 
test_y = labels[10000:] 
test_seqlens = seqlens[10000:]

In [6]:
def get_sentence_batch(batch_size, data_x, data_y, data_seqlens):
    instance_indices = list(range(len(data_x)))
    np.random.shuffle(instance_indices)
    batch = instance_indices[:batch_size]
    
    x = [[word2index_map[word] for word in data_x[i].split()] for i in batch]
    y = [data_y[i] for i in batch]
    seqlens = [data_seqlens[i] for i in batch]
    
    return x, y, seqlens

In [7]:
_input = tf.placeholder(tf.int32, shape=[batch_size, times_steps])
_y = tf.placeholder(tf.float32, shape=[batch_size, num_classes])

# seqlen for dynamic calculation
_seqlen = tf.placeholder(tf.int32, shape=[batch_size])

In [20]:
# 词嵌入向量(WordEmbedding处理
with tf.name_scope('embeddings'):
    embeddings = tf.Variable(
        tf.random_uniform([vocabulary_size,
                          embedding_dimension],
                         -1.0, 1.0), name='embedding')
    embed = tf.nn.embedding_lookup(embeddings, _input)

In [9]:
with tf.variable_scope('lstm'):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size,
                                            forget_bias=1.0)
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, embed, sequence_length=_seqlen,
                                       dtype=tf.float32)
    # states: it conveniently stores for us the last relevant output vector—
    # its values match the last relevant output vector before zero-padding
    
weights = {
    'linear_layer':tf.Variable(tf.truncated_normal([hidden_layer_size,
                                                   num_classes],
                                                  mean=0, stddev=0.01))
}
biases = {
    'linear_layer':tf.Variable(tf.truncated_normal([num_classes],
                                                  mean=0, stddev=0.01))
}

final_output = tf.matmul(states[1],
                        weights['linear_layer']) + biases['linear_layer']
softmax = tf.nn.softmax_cross_entropy_with_logits(logits=final_output, labels=_y)
cross_entropy = tf.reduce_mean(softmax)    

Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [10]:
train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(final_output, 1), tf.argmax(_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))*100

In [15]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for i in range(1000):
        x_batch, y_batch, seqlen_batch = get_sentence_batch(batch_size, train_x, train_y, seqlens)
        sess.run(train_step, feed_dict={_input:x_batch, _y:y_batch, _seqlen:seqlen_batch})
        
        if i%100 == 0:
            acc = sess.run(accuracy, feed_dict={_input:x_batch, _y:y_batch, _seqlen:seqlen_batch})
            print("Accuracy at %d: %.5f" % (i, acc))
        
    for test_batch in range(5):
        x_test, y_test, seqlen_test = get_sentence_batch(batch_size, test_x, test_y, test_seqlens)
        batch_pred, batch_acc = sess.run([tf.argmax(final_output, 1), accuracy], feed_dict={_input:x_test, _y:y_test, _seqlen:seqlen_test})
        print("Test batch accuracy %d: %.5f" % (test_batch, batch_acc))
    
    output_example = sess.run([outputs], feed_dict={_input:x_test, _y:y_test, _seqlen:seqlen_test}) # time_steps * hidden_layer_size
    states_example = sess.run([states[1]], feed_dict={_input:x_test, _y:y_test, _seqlen:seqlen_test})

Accuracy at 0: 50.00000
Accuracy at 100: 100.00000
Accuracy at 200: 100.00000
Accuracy at 300: 100.00000
Accuracy at 400: 100.00000
Accuracy at 500: 100.00000
Accuracy at 600: 100.00000
Accuracy at 700: 100.00000
Accuracy at 800: 100.00000
Accuracy at 900: 100.00000
Test batch accuracy 0: 100.00000
Test batch accuracy 1: 100.00000
Test batch accuracy 2: 100.00000
Test batch accuracy 3: 100.00000
Test batch accuracy 4: 100.00000


In [18]:
print(output_example[0].shape) # 32: hidden_layer_size
print(len(output_example))

(128, 6, 32)
1


In [19]:
print(states_example[0].shape)
print(len(states_example))

(128, 32)
1


In [None]:
# Stacking multiple LSTMs 
num_LSTM_layers = 2
with tf.variable_scope('lstm'):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size,
                                            forget_bias=1.0)
    cell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell]*num_LSTM_layers,
                                      state_is_tuple=True)
    outputs, states = tf.nn.dynamic_rnn(cell, embed,
                                       sequence_length=_seqlen,
                                       dtype=tf.float32)

# 需要修改 final_output:
# Extract the final state and use in a linear layer 
final_output = tf.matmul(states[num_LSTM_layers][1],
                        weights['linear_layer']) + biases['linear_layer']