In [1]:
from tensorflow.models.rnn.ptb import reader
import tensorflow as tf
import numpy as np

In [2]:
# Settings
batch_size = 16
num_steps = 8
hidden_size = 128
vocab_size = 10000
data_path = 'simple-examples/data/'

In [10]:
# Preprocess data
train_corpus, valid_corpus, test_corpus, _ = reader.ptb_raw_data(data_path)
train_pairs = list(reader.ptb_iterator(train_corpus, batch_size, num_steps))
test_pairs = list(reader.ptb_iterator(test_corpus, batch_size, num_steps))

In [4]:
# Create placeholder for training data
input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
targets = tf.placeholder(tf.int32, [batch_size, num_steps])

# Lookup word embedding
embedding = tf.Variable(tf.truncated_normal([vocab_size, hidden_size], stddev=0.01))
inputs = tf.nn.embedding_lookup(embedding, input_data)

# Create and connect RNN cells
cell = tf.nn.rnn_cell.GRUCell(hidden_size)
rnn_inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)]
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.rnn(cell, rnn_inputs, initial_state=initial_state)

# Predict distribution over next word
output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
softmax_w = tf.Variable(tf.truncated_normal([hidden_size, vocab_size], stddev=0.01))
softmax_b = tf.Variable(tf.constant(0.01, shape=[vocab_size]))
logits = tf.matmul(output, softmax_w) + softmax_b

# Define loss function and optimize it
loss = tf.nn.seq2seq.sequence_loss_by_example(
    [logits],
    [tf.reshape(targets, [-1])],
    [tf.ones([batch_size * num_steps], dtype=tf.float32)])

cost = tf.reduce_sum(loss) / batch_size
train_step = tf.train.AdamOptimizer().minimize(cost)

In [5]:
session = tf.InteractiveSession()
tf.initialize_all_variables().run()
state_value = initial_state.eval()

for i, (x, y) in enumerate(train_pairs):
    cost_value, state_value, _ = session.run([cost, state, train_step],
                                 {input_data: x, targets: y, initial_state: state_value})
    if i % 1000 == 0:
        perplexity = np.exp(cost_value / num_steps)
        print(i, perplexity)

0 10000.3350706
100 920.36028304
200 678.291748702
300 612.169591022
400 821.56982151
500 436.038341557
600 331.660217599
700 462.461936803
800 593.215165007
900 463.982041625
1000 573.939940394
1100 384.355951536
1200 493.856857118
1300 330.300567905
1400 239.248514871
1500 549.419468696
1600 290.205821542
1700 358.346958414
1800 226.016831956
1900 267.237587483
2000 264.407071453
2100 273.740536272
2200 394.204222525
2300 152.777649479
2400 170.789594572
2500 227.37268211
2600 353.00619124
2700 345.992778655
2800 274.454156776
2900 224.863056912
3000 192.821429791
3100 395.852972972
3200 301.195303599
3300 191.907102458
3400 267.609683565
3500 391.289654315
3600 166.920947543
3700 218.464467805
3800 175.264956536
3900 264.608622662
4000 272.834466395
4100 232.023988051
4200 162.484073311
4300 176.815870175
4400 279.284039915
4500 208.662333381
4600 149.246814117
4700 213.073690178
4800 374.368109362
4900 152.429168582
5000 226.029765113
5100 218.273603996
5200 173.942582946
5300 307.

In [12]:
total_cost = 0
state_value = initial_state.eval()

for x, y in test_pairs:
    cost_value, state_value = session.run([cost, state],
                                 {input_data: x, targets: y, initial_state: state_value})
    total_cost += cost_value
    
perplexity = np.exp(total_cost / (len(test_pairs) * num_steps))
print('test perplexity', perplexity)

test perplexity 173.793656848
