# Read Book and Predict Text Using RNN

In [1]:
import numpy as np
import tensorflow as tf
import time

  from ._conv import register_converters as _register_converters


#### Load Text and Convert to Integer:

In [2]:
with open('anna.txt', 'r') as f:
    text=f.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
vocab_len = len(vocab)
# the encoded is the text in integers

#### Show some texts and encoded texts:

In [3]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [4]:
encoded[:100]

array([31, 64, 57, 72, 76, 61, 74,  1, 16,  0,  0,  0, 36, 57, 72, 72, 81,
        1, 62, 57, 69, 65, 68, 65, 61, 75,  1, 57, 74, 61,  1, 57, 68, 68,
        1, 57, 68, 65, 67, 61, 26,  1, 61, 78, 61, 74, 81,  1, 77, 70, 64,
       57, 72, 72, 81,  1, 62, 57, 69, 65, 68, 81,  1, 65, 75,  1, 77, 70,
       64, 57, 72, 72, 81,  1, 65, 70,  1, 65, 76, 75,  1, 71, 79, 70,  0,
       79, 57, 81, 13,  0,  0, 33, 78, 61, 74, 81, 76, 64, 65, 70])

#### Make Training mini-batches:

In [5]:
def get_batches(arr, batch_size, n_steps):
    '''
    arr: Array you want to make batches from
    batch_size: the number of sequences/rows per batch
    n_steps: number of sequence_steps/columns per batch
    '''
    char_per_batch = batch_size * n_steps
    n_batches = len(arr)//char_per_batch
    # ignore the last part which doesn't have enough data to make a batch
    
    count = 0
    for batch in range(n_batches):
        # features
        x = arr[count: count+char_per_batch]
        # targets
        y = arr[count+1: count+char_per_batch+1]
        
        # if the last batch cannot make a batch because there is no next char
        if (len(y) < len(x)):
            break
            
        count += char_per_batch
        yield x.reshape(batch_size, -1), y.reshape(batch_size, -1)

#### Show get_batches:

In [6]:
batches = get_batches(encoded, 3, 10)
x, y = next(batches)
print('X:')
for line in x:
    for ch in line:
        print(int_to_vocab[ch], end='')
x, y = next(batches)
print('\nNextX:')
for line in x:
    for ch in line:
        print(int_to_vocab[ch], end='')
print('\nNextY:')
for line in y:
    for ch in line:
        print(int_to_vocab[ch], end='')
print('\n\nX\'s size is:'+str(x.shape))

X:
Chapter 1


Happy families are
NextX:
 all alike; every unhappy fami
NextY:
all alike; every unhappy famil

X's size is:(3, 10)


#### Build RNN Cell:
<img src="assets/charRNN.png" width=500px>

#### Build Placeholders:

In [17]:
# define placeholders for TF graph
def build_inputs(batch_size, num_steps):
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    return inputs, targets, keep_prob

#### One-hot Encode Inputs and Targets:

In [18]:
def one_hot(inputs, targets, num_classes):
    x_one_hot = tf.one_hot(inputs, num_classes)
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, [-1, num_classes])
    return x_one_hot, y_reshaped

#### Build LSTM Cell:

In [19]:
def build_lstm(lstm_out_size, num_layers, batch_size, keep_prob):
    '''
    keep_prob: scalar tensor
    lstm_out_size: number of features that this RNN outptus per (batch, step)
    num_layers: number of RNN layer
    batch_size: batch size
    '''
    # to build a single layer
    def build_layer(lstm_out_size, keep_prob):
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_out_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
    
    # stack layers
    cell = tf.contrib.rnn.MultiRNNCell([build_layer(lstm_out_size, keep_prob) for _ in range(num_layers)])
    # initial state is the initial memory
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

#### Generate RNN Outputs and States

In [33]:
def build_output(lstm_cell, initial_state, inputs, lstm_out_size, out_size):
    '''
    lstm_cell: layers of lstm
    initial_state: initial state of lstm_cell
    inputs: input to lstm cell
    lstm_out_size: the output size of lstm
    '''
    # get outputs from RNN
    outputs, state = tf.nn.dynamic_rnn(lstm_cell, inputs, initial_state = initial_state)
    # flatten the outputs from RNN, so that it can be input into a dense layer
    inputs_flat = tf.reshape(outputs, [-1, lstm_out_size])
    
    # create logits
    logits = tf.layers.dense(inputs_flat, out_size)
    
    #create prediction in probability
    out = tf.nn.softmax(logits)
    
    return out, logits, state

#### Loss:

In [21]:
def build_loss(logits, targets):
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets)
    loss = tf.reduce_mean(loss)
    return loss

#### Optimizer:

In [22]:
def build_optimizer(loss, learning_rate, grad_clip):
    # clip gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    return optimizer

#### Define Hyperparameters:

In [41]:
batch_size = 64
num_steps = 128
keep_prob_value = 0.6
lstm_out_size = 512
num_layers = 2
epoches = 20
learn_rate = 0.001
grad_clip = 5

#### Combine Everything Together to Form a TF Graph:

In [42]:
tf.reset_default_graph()

In [43]:
## placeholders
raw_inputs, raw_targets, keep_prob = build_inputs(batch_size, num_steps)
## turn raw data into onehot
inputs, targets = one_hot(raw_inputs, raw_targets, vocab_len)
## create LSTM cell
cell, initial_state = build_lstm(lstm_out_size, num_layers, batch_size, keep_prob)
## output of RNN
pred_prob, logits, state = build_output(cell, initial_state, inputs, lstm_out_size, vocab_len)
## create loss
loss = build_loss(logits, targets)
## create optimizer
opt = build_optimizer(loss, learn_rate, grad_clip)

#### Important Graph Nodes:
* raw_inputs, raw_targets, keep_prob, initial_state
* inputs, targets
* cell, initial_state, state
* pred_prob, logits
* loss, opt

#### Train:

In [45]:
keep_prob_value = 0.6
print_every_n = 50
counter = 0
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(epoches):
        new_state = sess.run(initial_state)
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            feed = {    raw_inputs: x,
                        raw_targets: y,
                        keep_prob: keep_prob_value,
                        initial_state: new_state}
            batch_loss, new_state, _ = sess.run([loss, state, opt], feed_dict = feed)
            
            if (counter % print_every_n == 0):
                print("Training Loss: {} at count: {}".format(batch_loss, counter))
            
        print("Epoch: {}".format(e+1))
        
    # save the model    
    saver.save(sess, "checkpoints/checkpoint.ckpt")

Training Loss: 3.394368886947632 at count: 5
Training Loss: 3.2791168689727783 at count: 10
Training Loss: 3.224954605102539 at count: 15
Training Loss: 3.2368245124816895 at count: 20


KeyboardInterrupt: 

#### Load Checkpoint:

In [46]:
checkpoint = tf.train.latest_checkpoint('checkpoints')

#### Define A function that can randomly pick 1 of the n most confident characters

In [47]:
def pick_top_n(preds, vocab_size, top_n=2):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

#### Check the Hyperparameter to pass 1 char at a time

In [48]:
batch_size = 1
num_steps = 1

#### Redefine the Model Graph:

In [50]:
tf.reset_default_graph()

In [51]:
## placeholders
raw_inputs, raw_targets, keep_prob = build_inputs(batch_size, num_steps)
## turn raw data into onehot
inputs, targets = one_hot(raw_inputs, raw_targets, vocab_len)
## create LSTM cell
cell, initial_state = build_lstm(lstm_out_size, num_layers, batch_size, keep_prob)
## output of RNN
pred_prob, logits, state = build_output(cell, initial_state, inputs, lstm_out_size, vocab_len)
## create loss
loss = build_loss(logits, targets)
## create optimizer
opt = build_optimizer(loss, learn_rate, grad_clip)

#### Get output from the model:

In [None]:
prime = 'Princess'
samples = [c for c in prime]
saver = tf.train.Saver()

with tf.Session() as sess:
    saver.restore(sess, checkpoint)
    new_state = sess.run(initial_state)
    for c in prime:
        x = np.zeros((1, 1))
        x[0,0] = vocab_to_int[c]
        feed = {inputs: x,
                keep_prob: 1.,
                initial_state: new_state}
        preds, new_state = sess.run([pred_prob, state], 
                                     feed_dict=feed)
        
    c = pick_top_n(preds, len(vocab))
    samples.append(int_to_vocab[c])
    
    for i in range(1000):
        x[0,0] = c
        feed = {inputs: x,
                keep_prob: 1.,
                initial_state: new_state}
        preds, new_state = sess.run([pred_prob, state], 
                                     feed_dict=feed)
        
        if (c==' '):
            c = pick_top_n(preds, len(vocab), 2)
        else:
            c = pick_top_n(preds, len(vocab), 1)
        samples.append(int_to_vocab[c])
    
mystring = ''.join(samples)

In [None]:
print(mystring)