# Goal

Create a basic sequence training and sampling mechanism using tensorflow. This work is like multitude of similar works emulating https://gist.github.com/karpathy/d4dee566867f8291f086 (karpathy/min-char-rnn.py). Data pre-processing is directly borrowed from aforesaid post. 

Dataset is bigger (~5X) than that used in the original experiment (included in this repository.)

Only cross-entropy is tracked here. Other language model specific parameters like Prelexity of the model is not tracked here.

....


In [1]:
import tensorflow as tf
import numpy as np

from collections import namedtuple
from tqdm import tqdm
import os
import time

In [2]:
# Reading the data
data = open('sample_input.txt').read()

# Vocabulary business

# 1. Create a list of unique characters
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

char_to_ix = {ch:i for i,ch in enumerate(chars) }
ix_to_chars = {i:ch for i,ch in enumerate(chars)}

x=np.zeros(len(data))
for i, c in enumerate(data):
    x[i]=char_to_ix[c]

data=x

print("The dataset has %d characters and %d unique." %(data_size, vocab_size))

The dataset has 5283795 characters and 80 unique.


In [3]:
# Defining hyperparameter tuple and setting hyper parameters
hparams = namedtuple('hyper_parameters', 
                     'hidden_size, seq_length, learning_rate,'
                     'batch_size, vocab_size,'
                    'num_epochs, num_layers, keep_prob')


# Using the hyper parameters also used by:
# Martin Gorner
#https://github.com/martin-gorner/tensorflow-rnn-shakespeare/blob/master/rnn_train.py

hps = hparams(hidden_size=512,
              seq_length=30,
              learning_rate=1e-3,
              batch_size=200,
              vocab_size=vocab_size,
              num_epochs=50,
              num_layers=3,
              keep_prob=0.8)

In [4]:
class babble(object):
    def __init__(self, hps, mode, debug=True):
        self.hps=hps
        self.mode=mode
        self.batch_pointer=None
    
        
    def buildGraph(self, variant="fixed_length"):
        # Extracting structural specifics from HPS
        D = self.hps.vocab_size
        H = self.hps.hidden_size
        N = self.hps.batch_size
        T = self.hps.seq_length

        # Placeholder
        with tf.name_scope("PlaceHolders"):
            self.X = tf.placeholder(tf.int32, [None, None], "Inputs")
            self.Y = tf.placeholder(tf.int32, [None, None], "Expected_Output")
            #is_training = tf.placeholder(tf.bool)
            self.h0 = tf.placeholder(tf.float32, [None, self.hps.num_layers*self.hps.hidden_size], "initial_hidden_state")
        
        # No projection to embedding is performed in this experiment
        # Inputs are simply translated to one hot
        inputs = tf.one_hot(self.X,depth=self.hps.vocab_size)
            
        #with tf.name_scope("batch_norm"):
        #    inputs = tf.layers.batch_normalization(inputs)
        cell = tf.contrib.rnn.GRUCell(H)
        
    
        cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.hps.keep_prob)
        

        multi_cell = tf.contrib.rnn.MultiRNNCell([cell]*self.hps.num_layers, state_is_tuple=False)

        input_shape = tf.shape(self.X)
        
        #states = multi_cell.zero_state(self.hps.batch_size, tf.float32)
        self.zerostate = multi_cell.zero_state(input_shape[0], dtype=tf.float32) 
        outputs, self.hidden_state = tf.nn.dynamic_rnn(
                                         cell=multi_cell,
                                         dtype=tf.float32,
                                         inputs=inputs,
                                         initial_state=self.h0
                     
        )

        
             
        with tf.name_scope("Dense_Output_Layer"):
            outputs=tf.reshape(outputs, [-1, H])
            scores=tf.layers.dense(outputs, D)
        tf.summary.histogram('scores', scores)
        self.scores=scores
        return self.scores
      
        
    def trainStep(self, scores):
        # Extracting structural specifics from HPS
        D = self.hps.vocab_size
        H = self.hps.hidden_size
        N = self.hps.batch_size
        T = self.hps.seq_length             
        
        # Define the additional part of the network Used for training
        # Loss and Optimizer
        y_int=tf.reshape(self.Y, [-1])
        with tf.name_scope("COST"):
            loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.one_hot(y_int,self.hps.vocab_size),
                logits=scores,
                name="softMaxCrossEntropy"
            )
            loss = tf.reduce_mean(loss)
        tf.summary.scalar('loss_', loss)
        
        
        with tf.name_scope("Predictions"):
            predictions = tf.cast(tf.argmax(scores, axis=-1, name="predictions"), tf.int32)
        #accuracy=tf.reduce_mean(tf.cast(tf.equal(predictions, self.Y), tf.float32))
                                         
        solver = tf.train.AdamOptimizer(self.hps.learning_rate)
        #solver = tf.train.GradientDescentOptimizer(hps.learning_rate)
        #solver = tf.train.MomentumOptimizer(hps.learning_rate, 0.9)
        
        tvars  = tf.trainable_variables()
        gs_int = tf.gradients(loss, tvars)
        grads = list(zip(gs_int, tf.trainable_variables()))
        gs, _  = tf.clip_by_global_norm(gs_int, 3.0)
        train_step = solver.apply_gradients(zip(gs,tvars), global_step=tf.contrib.framework.get_or_create_global_step())
        summary = tf.summary.merge_all()
        return train_step, loss, summary, self.zerostate
   
    def train(self,sess, data,train_ops, writer=None, print_every=10):
        itr=0
        num_batches=data.shape[0]//self.hps.batch_size//self.hps.seq_length

        for e in tqdm(list(range(hps.num_epochs)), desc='epoch'):
            
            total_correct=0
            for i in range(num_batches):
                itr+=1
                x,y = self.getNextBatch(data)
                feed_dict={self.X: x, self.Y:y, self.h0:np.zeros((self.hps.batch_size, self.hps.hidden_size*self.hps.num_layers))}
                _, loss, summary,_ = sess.run(train_ops, feed_dict=feed_dict)
                if writer is not None:
                    writer.add_summary(summary)
            #Sample after every 10 epochs to see how we are doing
            if (e%10 == 0): 
                self.sample(sess,self.scores)

        
    
    def sample(self, sess, scores, 
               seed="I have something to say",
               length=200, beam_width=5):
        pd = tf.nn.softmax(scores)
        x= [char_to_ix[i] for i in seed]
        x=np.asarray(x).reshape(1,-1)
        y=np.zeros_like(x)
        feed_dict={self.X:x, self.Y:y, self.h0:np.zeros((1,self.hps.hidden_size*self.hps.num_layers))}
        pred_str=[]
        for i in range(length):
            p,h0=sess.run([pd,self.hidden_state], feed_dict=feed_dict)
            feed_dict[self.h0]=h0
            p=p[-1]
            #print(p.shape)
            ix = np.random.choice(range(self.hps.vocab_size), p=p.ravel())
            #ix=np.argmax(p.ravel())
            p.reshape(1,-1)
            feed_dict[self.X]=ix*np.ones((1,1))
            pred_str.append(ix_to_chars[ix])
        txt=''.join(pred_str)
        print ('----\n %s \n----' % (txt, ))

    # data is expected to be numpy array of indices
    def getNextBatch(self, data):
        if self.batch_pointer is None:
            segment=data.shape[0]//self.hps.batch_size
            self.batch_pointer = np.array([offset*segment for offset in range(self.hps.batch_size)])
        else:
            self.batch_pointer += 1
            self.batch_pointer %= data.shape[0]
        
        
        x=np.zeros((self.hps.batch_size, self.hps.seq_length))
        y=np.zeros((self.hps.batch_size, self.hps.seq_length))
        
        indices=self.batch_pointer
        
        for i in range(self.hps.seq_length):
            x[:,i]=np.take(data,indices, mode='wrap')
            y[:,i]=np.take(data,indices+1, mode='wrap')
            indices+=1
            
        return x,y

In [5]:
model=babble (hps,'train')
sample=model.getNextBatch(data)

In [6]:
tf.reset_default_graph()
scores=model.buildGraph()
train_ops=model.trainStep(scores)

config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess=tf.Session(config=config)
summaries=tf.summary.merge_all()
writer = tf.summary.FileWriter(
            os.path.join('./tf_logs', time.strftime("%Y-%m-%d-%H-%M-%S")))
writer.add_graph(sess.graph)

with tf.device("/gpu:0"):
    sess.run(tf.global_variables_initializer())
    model.train(sess, data, train_ops,writer)
    model.sample(sess, scores) 
    
    

epoch:  20%|██        | 10/50 [11:29<46:20, 69.51s/it]

----
 
	As makes it was your works] -Somethy, or bonessing that? why, like;
	And bord with memaforret's life upont, where being
	Is thought has too,, and have like our oping man eitile fortly
Roppose of his 
----
----
 
	I starn my griegy. I am a gain trijun, thou wilt not call him ill, then now with gried to time, whose enemy is still affections to ency drowning,
	Though he's as wantons of their accidents, fill at  
----

epoch:  40%|████      | 20/50 [23:06<34:36, 69.22s/it]


----
  'of rain,
	Be gain'd, and level, if it were and grossly thyself against a man; and hang this grief, go you up in his uncle By yours: of thy mapes, the foreod!
	In while all I lived, rather thou one.
 
----

epoch:  60%|██████    | 30/50 [34:37<23:02, 69.15s/it]


----
  'Then Wherefore? who is thoughts? Confounds thy bode,
	This man losse so, my Mowbray, stande with thee than she as have I took thy niest how a grief. Look you for honey himself
	so wrangled him: his  
----

epoch:  80%|████████  | 40/50 [46:17<11:45, 70.58s/it]


----
 ,
	Are in unrunkind trift of deity's. Good Petitionus: to that terrible, for the kmight were no more now she baits to taken,
	And that you would take love that not exercises, which we atandiff: even s 
----

epoch: 100%|██████████| 50/50 [58:39<00:00, 74.93s/it]


----
  I would not get him.

SILVIUS	He etis flesh apart, folly and worth of Richmond neared; nothing admiting to no more right that with the promesses the loss of all the third,
	As sixped with the relief, 
----



