# Character Level Text Generation
You will implement a multi-layer Recurrent Neural Network (RNN, LSTM, and GRU) for training/sampling from character-level language models, which takes one text file as input and trains an RNN that learns to predict the next character in a sequence. The RNN can then be used to generate text character by character that will look like the original training data.


In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import tensorflow as tf
import timeit
import os
from six.moves import cPickle
from text_utils import TextLoader
from tensorflow.contrib import rnn
from char_rnn_model import Model
import numpy as np

In [3]:
# # read data from file and save preprocessed data
# T = TextLoader()
# T.read_data('shakespeare.txt')

# Load data using TextLoader object

In [4]:
train = np.load('s_data/train.npy')
val = np.load('s_data/val.npy')

In [5]:
# read vocab dict back from the file
pkl_file = open('s_data/vocab.pkl', 'rb')
vocab = cPickle.load(pkl_file)
pkl_file.close()

In [6]:
# read vocab dict back from the file
pkl_file = open('s_data/inverse_vocab.pkl', 'rb')
inverse_vocab = cPickle.load(pkl_file)
pkl_file.close()

In [7]:
print(vocab)
print(inverse_vocab)

{',': 23, "'": 36, 'C': 6, '?': 32, 'G': 57, 'P': 53, 'a': 19, '&': 60, '-': 47, 'u': 21, 'e': 8, '\n': 11, 'c': 17, '$': 64, 'w': 15, ';': 41, 'I': 38, 'D': 52, 'b': 42, 'F': 0, 'x': 55, 'y': 20, 'd': 18, 'S': 29, 'm': 24, '3': 63, 'T': 48, 'O': 44, 'A': 27, 't': 4, 'h': 22, 'z': 7, 'i': 1, ' ': 5, 'H': 49, 'v': 31, 'o': 14, 'L': 37, 'R': 33, 'J': 56, 'q': 54, 'B': 12, 'W': 35, 'U': 51, ':': 10, 'Y': 30, 'r': 2, 'X': 62, 'M': 34, 'Z': 61, ']': 66, 'j': 45, 'l': 28, 'N': 39, 'Q': 59, 'g': 40, 'E': 50, 'k': 25, 'n': 9, 'p': 16, '!': 43, '[': 65, 'f': 13, 'K': 58, 's': 3, 'V': 46, '.': 26}
{0: 'F', 1: 'i', 2: 'r', 3: 's', 4: 't', 5: ' ', 6: 'C', 7: 'z', 8: 'e', 9: 'n', 10: ':', 11: '\n', 12: 'B', 13: 'f', 14: 'o', 15: 'w', 16: 'p', 17: 'c', 18: 'd', 19: 'a', 20: 'y', 21: 'u', 22: 'h', 23: ',', 24: 'm', 25: 'k', 26: '.', 27: 'A', 28: 'l', 29: 'S', 30: 'Y', 31: 'v', 32: '?', 33: 'R', 34: 'M', 35: 'W', 36: "'", 37: 'L', 38: 'I', 39: 'N', 40: 'g', 41: ';', 42: 'b', 43: '!', 44: 'O', 45: 'j',

In [8]:
char = list(vocab.keys())
vocab_size = len(char)

In [9]:
print(vocab_size)

67


In [10]:
import copy
def batch_generator(data,batch_size,num_seq):
    data1 = copy.deepcopy(data)
    # generate sequence
    x_s = []
    y_s = []
    for i in range(len(data)-num_seq-1):
        x_s.append(data1[i:i+num_seq])
        y_s.append(data1[i+1:i+num_seq+1])
        i += num_seq
        
    # generate batch    
    start_idx = 0
    end_idx = batch_size
    while end_idx < len(data):
        x = np.reshape(x_s[start_idx:end_idx],(batch_size,num_seq,1))
        y = np.reshape(y_s[start_idx:end_idx],(batch_size,num_seq))
        yield(x,y)
        start_idx += batch_size
        end_idx += batch_size

# Define directories, hyperparameter

In [11]:
epoch = 3
#unrolled through time steps
seq_len=7
#hidden LSTM units
rnn_size=100

# number of lstm layer
num_layers = 3

#learning rate for adam
learning_rate=0.001


#size of batch
batch_size=128

# keep probability
keep_prob = 0.5

# Create your model object

In [12]:
model = Model(batch_size, seq_len, rnn_size, num_layers, learning_rate, vocab_size, keep_prob)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


# Training

In [13]:
saver = tf.train.Saver()

In [None]:
with tf.Session() as sess:

    sess.run(tf.global_variables_initializer())

    for i in range(1,epoch+1):
        #fetch batch
        training_data = batch_generator(train,batch_size,seq_len)
        a = 0
        for t_data, labels in training_data:
            f_dict = {model.X: t_data, model.Y: labels}
            sess.run(model.train_op, feed_dict=f_dict)

        save_path = saver.save(sess, "checkpoint/model1.ckpt")
        print("Model saved in path: %s" % save_path)

        a = 0
        correct= 0
        validation_data = batch_generator(val, batch_size,seq_len)
        for v_data, labels in validation_data:
#             a = a+1
#             if a==20: break
            f_dict = {model.X: v_data, model.Y: labels}
            correct += sess.run(model.num_correct, feed_dict=f_dict)
        val_acc = correct/(val.shape[0]*7*1.0)


        print('Epoch'+str(i),"validation accuracy= {:.5f}".format(val_acc))


print("Training finished!")


Model saved in path: checkpoint/model1.ckpt
Epoch1 validation accuracy= 0.31114
Model saved in path: checkpoint/model1.ckpt
Epoch2 validation accuracy= 0.32491


# Generating Text

In [None]:

#size of batch
batch_size=1 

model = Model(batch_size, seq_len, rnn_size, num_layers, learning_rate, vocab_size, keep_prob)

In [101]:
def sample(sess, vocab, vocab_reverse, n, start):
        
        # covert the input to index
        split = [vocab[i] for i in start]
           
        for i in range(n):
            length = len(split)
            
            #get the last seq_len elements of the sequence
            piece = split[length - seq_len : length]
            
            batch_x = [piece]
            batch_x = np.reshape(batch_x,[1,seq_len,1])

            next_chars = sess.run(logits, feed_dict={X:batch_x})[:,-1,:]
            val = np.argmax(next_chars)
            split.append(val) # generate new input 

        # change the prediction from idx to character
        string_back = []
        for i in range(len(split)):
            val = split[i] 
            string_back.append(vocab_reverse[val])
            
        
        return "".join(string_back)

In [102]:
saver = tf.train.Saver()

In [104]:
with tf.Session() as sess:
  # Restore variables from disk.
    saver.restore(sess, "checkpoint/model.ckpt")
    a = model.sample(sess, vocab, inverse_vocab, 300, 'queen then')
    print(a)

INFO:tensorflow:Restoring parameters from checkpoint/model.ckpt
queen then the lords the hand the the the tond the see the throw have the that the sarry the the recond Lo me the the sour the come the the rear the speet the for the sou the the so the pomet the the that the the the the soer and the send the word the the so the man the seed the lesse the say the son the meen


In [None]:
The network can predict correct word, but  