## sequence prediction

In [1]:
from IPython.display import clear_output

import tensorflow as tf
from tensorflow import nn
import numpy as np
import math

lstm_size  = 512
n_layers   = 3
n_epochs   = 10 
seq_len    = 50
batch_size = 50
lr = 0.1
grad_norm = 5

### utils

In [2]:
def xavier(n_in, n_out):
    init_range = 4*math.sqrt(6.0/(n_in + n_out))
    return tf.random_uniform([n_in, n_out], -init_range, init_range)

### data loading

In [8]:
with open('shakespeare.txt') as f:
    words_full = [c for c in list(f.read())]
    vocab = []
    for c in words_full:
        if c not in vocab:
            vocab.append(c)
    vocab_size = len(vocab)    
    
    i_split = int(len(words_full)*0.8)
    words_tr = np.array([vocab.index(el) for el in words_full[:i_split]], dtype='int32')
    words_te = np.array([vocab.index(el) for el in words_full[i_split:]], dtype='int32')

    #make batches
    tr_x = words_tr.copy()
    #target is just data shifted one element to the right
    tr_y = np.roll(words_tr, 1)
    sp = [i*batch_size*seq_len for i in range(1,len(words_tr)//(batch_size*seq_len) + 1)]
    n_batches = len(tr_x)//(batch_size*seq_len)
    if(len(tr_x) % batch_size*seq_len != 0):
        # drop last small batch
        tr_x = np.array(np.split(tr_x, sp)[:-1]).reshape(n_batches, seq_len, batch_size)
        tr_y = np.array(np.split(tr_y, sp)[:-1]).reshape(n_batches, seq_len, batch_size)

### model

In [4]:
#create model
tf.reset_default_graph()

lstm = nn.rnn_cell.BasicLSTMCell(lstm_size)
stacked_lstm = nn.rnn_cell.MultiRNNCell([lstm] * n_layers)
input_data = tf.placeholder(tf.int32, [batch_size, seq_len])
targets = tf.placeholder(tf.int32, [batch_size, seq_len])
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)


embedding = tf.get_variable("embedding", [vocab_size, lstm_size])
inputs = tf.nn.embedding_lookup(embedding, input_data)

outputs = []
for i in range(seq_len):
    if i > 0:
        tf.get_variable_scope().reuse_variables()
    cell_output, state = stacked_lstm(inputs[:,i,:], state)
    outputs.append(cell_output)
final_state = state

output = tf.reshape(tf.concat(1, outputs), [-1, lstm_size])
softmax_w = tf.Variable(tf.random_normal([lstm_size, vocab_size], stddev=0.35), dtype=tf.float32)
softmax_b = tf.Variable(tf.zeros([vocab_size]), dtype=tf.float32)
z = tf.matmul(output, softmax_w) + softmax_b
probs = tf.nn.softmax(z)
loss = tf.nn.seq2seq.sequence_loss_by_example([z], [tf.reshape(targets, [-1])], [tf.ones([batch_size * seq_len])] )
cost = tf.reduce_sum(loss) / batch_size
lr = tf.Variable(lr, trainable=False)
optimizer = tf.train.GradientDescentOptimizer(lr)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_norm)
train_op = optimizer.apply_gradients(zip(grads, tvars))




### training

In [5]:
init = tf.initialize_all_variables()
#train
with tf.Session() as s:
    s.run(init)
    for i in range(n_epochs):
        ep_cost = 0
        for j in range(n_batches):
            print(j+1, end=' ')
            _, _, co, _ = s.run([output, final_state, cost, train_op], feed_dict={input_data:tr_x[j], targets:tr_y[j]})
            ep_cost+=co
        print()
        print('Epoch %d, cost per epoch %.3f' % (i+1, ep_cost/n_epochs))

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 1, cost per epoch 56.023
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 2, cost per epoch 50.375
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 3, cost per epoch 44.647
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 4, cost per epoch 36.147
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 5, cost per epoch 27.669
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 6, cost per epoch 21.283
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 7, cost per epoch 16.447
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
Epoch 8, cost per epoch 12.788
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 

KeyboardInterrupt: 

### testing

In [14]:
with tf.Session() as s:
    inpt = 'The'
    sample_size = 100
    for c in inpt[:-1]:
        x = np.zeros((1,1))
        x [0,0] = vocab.index(c)
        state = s.run([final_state], feed_dict={input_data:x, state:final_state})
    
    res = inpt
    char = inpt[-1]
    
    for i in range(sample_size):
        x = np.zeros((1,1))
        probs, state = s.run([final_state], feed_dict={input_data:x, state:final_state})
        p = probs[0]
        sample = np.argmax(p)
        pred = vocab[c]
        res += pred

ValueError: Cannot feed value of shape (1, 1) for Tensor 'Placeholder:0', which has shape '(50, 50)'

### TODO

* Add validation
* Add sampling