In [2]:
import tensorflow as tf
import numpy as np

import random

In [72]:
# For quick indexing, Get vocabulary, and generate idx2ch and ch2idx
# Generate Xs and Ys

filename = "data/paulg/paulg.txt"
ckpt_path = 'ckpt/'
seq_len = 20

f = open(file=filename)
lines = f.readlines()

raw_data = '\n'.join(lines) # Full raw data

vocab = sorted(list(set(raw_data))) 
print ('Vocabulary Size = ', len(vocab))

idx2ch = vocab
ch2idx = {ch:i for i,ch in enumerate(idx2ch)}

num_input_sequences = len(raw_data) // seq_len
X = np.zeros(shape=[num_input_sequences, seq_len], dtype=np.int32)
Y = np.zeros(shape=[num_input_sequences, seq_len], dtype=np.int32)
for i in range(num_input_sequences):
    X[i] = np.array(  [  ch2idx[ch] for ch in raw_data[seq_len*i : seq_len*(i+1)]  ]  )
    Y[i] = np.array(  [  ch2idx[ch] for ch in raw_data[seq_len*i+1 : seq_len*(i+1)+1]  ]  )
    
X = X.astype(np.int32)
Y = Y.astype(np.int32)

Vocabulary Size =  99


In [53]:
# params
hidden_state_size = 256
num_classes = len(idx2ch)
state_size = hidden_state_size
BATCH_SIZE = 128

In [54]:
# Batch Generation
# Use a Generator for batch generation

# @Vikash Learn the usage of arange, Learn usage of sample, learn usage of generators
def batch_generator():
    while True:
        sample_idx = random.sample(list(np.arange(len(X))), BATCH_SIZE)
        yield X[sample_idx], Y[sample_idx]

In [60]:
# Define step function to be used in tf.scan
def step(hprev, xt):
    #initializer
    xav_init = tf.contrib.layers.xavier_initializer()
    # params
    W = tf.get_variable('W', shape=[state_size, state_size], initializer=xav_init)
    U = tf.get_variable('U', shape=[state_size, state_size], initializer=xav_init)
    b = tf.get_variable('b', shape=[state_size], initializer=tf.constant_initializer(value=0.))
    # Current hidden state
    h = tf.tanh(tf.matmul(hprev, W) + tf.matmul(xt, U) + b)
    return h

In [61]:
# Tensorflow variables declaration

graph = tf.Graph()
with graph.as_default():
    # inputs
    xs_ = tf.placeholder(shape=[None, None], dtype=np.int32) # BATCH_SIZE * seq_len (will contain indices of characters)
    ys_ = tf.placeholder(shape=[None], dtype=np.int32) 

    # embeddings
    embeddings = tf.get_variable('emb', shape=[num_classes, state_size])
    rnn_inputs = tf.nn.embedding_lookup(embeddings, xs_) # rnn_inputs.shape = BATCH_SIZE * seq_len * state_size

    # initial_hidden_state
    initial_state = tf.placeholder(shape=[None, state_size], dtype=np.float32, name='initial_state') # BATCH_SIZE * state_size

    # Scan operation
    states = tf.scan(fn=step, 
                     elems=tf.transpose(rnn_inputs, [1, 0, 2]),
                     initializer=initial_state)

    # set last state
    last_state = states[-1]
    states = tf.transpose(states, [1, 0, 2])

    # Output weights
    V = tf.get_variable('V', shape=[state_size, num_classes],
                        initializer=tf.contrib.layers.xavier_initializer())
    # Output bias
    bo = tf.get_variable('bo', shape=[num_classes],
                         initializer=tf.constant_initializer(0.0))

    # Flatten states to 2-d matrix to be multiplied with V
    states_flattened = tf.reshape(tensor=states, shape=[-1, state_size])
    logits = tf.matmul(states_flattened, V) + bo
    predictions = tf.nn.softmax(logits=logits)

    # Calculate loss, Optimization step
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ys_)
    loss = tf.reduce_mean(losses)
    train_op = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss)

In [None]:
# Training time

epochs = 50
training_set = batch_generator()
with tf.Session(graph=graph) as sess:
    # init session
    sess.run(tf.global_variables_initializer())
    train_loss = 0
    try:
        for i in range(epochs):
            for j in range(1000):
                xs, ys = training_set.__next__()
                _, train_loss_ = sess.run([train_op, loss], feed_dict = {
                        xs_ : xs,
                        ys_ : ys.reshape([BATCH_SIZE * seq_len]),
                        initial_state : np.zeros([BATCH_SIZE, state_size])
                    })
                train_loss +=  train_loss_
                print('[{}] loss : {}'.format(i,train_loss/1000))
                train_loss = 0
    except KeyboardInterrupt:
        print('interrupted by user at ' + str(i))
        #
        # training ends here; 
        #  save checkpoint
        saver = tf.train.Saver()
        saver.save(sess, ckpt_path + 'vanilla1.ckpt', global_step=i)

[0] loss : 0.004632154941558838
[0] loss : 0.004332333564758301
[0] loss : 0.007811197757720947
[0] loss : 0.01008820629119873
[0] loss : 0.010538671493530273
[0] loss : 0.013271852493286133
[0] loss : 0.012941525459289551
[0] loss : 0.013567525863647461
[0] loss : 0.013936691284179688
[0] loss : 0.01430539894104004
[0] loss : 0.017343135833740236
[0] loss : 0.015680761337280275
[0] loss : 0.012917587280273438
[0] loss : 0.01058963394165039
[0] loss : 0.011009364128112794
[0] loss : 0.011745492935180664
[0] loss : 0.011423173904418946
[0] loss : 0.012323342323303222
[0] loss : 0.012547677040100097
[0] loss : 0.01267109203338623
[0] loss : 0.012278104782104492
[0] loss : 0.010780665397644043
[0] loss : 0.010450400352478028
[0] loss : 0.010386734008789063
[0] loss : 0.009539385795593262
[0] loss : 0.009310853004455567
[0] loss : 0.009513322830200194
[0] loss : 0.008723392486572265
[0] loss : 0.008196059226989747
[0] loss : 0.007955795288085937
[0] loss : 0.007575365543365478
[0] loss : 0

In [74]:
random_init_word = random.choice(idx2ch)
current_word = ch2idx[random_init_word]
print ('Provided random character = ', random_init_word)
#   
# start session
with tf.Session(graph=graph) as sess:
    # init session
    sess.run(tf.global_variables_initializer())
    # restore session
    ckpt = tf.train.get_checkpoint_state(ckpt_path)
    saver = tf.train.Saver()
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
    #   
    # generate operation
    words = [current_word]
    state = None
    # set batch_size to 1
    batch_size = 1 
    num_words =  111
    # enter the loop
    for i in range(num_words):
        if state:
            feed_dict = { xs_ : np.array(current_word).reshape([1, 1]), 
                    initial_state : state_ }
        else:
            feed_dict = { xs_ : np.array(current_word).reshape([1,1])
                    , initial_state : np.zeros([batch_size, state_size]) }
   
        # forward propagation
        preds, state_ = sess.run([predictions, last_state], feed_dict=feed_dict)
        #   
        # set flag to true
        state = True
        # 
        # set new word
        current_word = np.random.choice(preds.shape[-1], 1, p=np.squeeze(preds))[0]
        # add to list of words
        words.append(current_word)
#########
# text generation complete
#
print('______Generated Text_______')
print(''.join([idx2ch[w] for w in words]))

91
{
______Generated Text_______
{t,ytippp. t n. t,yyveyven. t .,yt t t t,yt n. t n. t t t t,yyt,yvyt n. n. t,yn. tippppp. n. n. t, yvy4, tip. t 
