# Recurrent Neural Networks
## Covers
   > Character level Vanilla RNN implementation.     
    Generating a sequence from a random input.  
    

# Vanilla implementation

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

>Change the text below to experiment with different inputs

In [2]:
#Create a test data
text = 'Hello how are you doing? I am great '
x = list(text[0:len(text)-1])
y = list(text[1:len(text)])

In [3]:
#Converting the input to one hot encoding 
from sklearn.preprocessing import LabelEncoder, LabelBinarizer

# String to int
le = LabelEncoder()
vocab = list(set(text))
le.fit(vocab)
text_int = le.transform(vocab)
print('**Input :x \n', x)
x = le.transform(x)
y = le.transform(y)
print('\n**LabelEncoder :x \n', x)
#One-hot encoding
lb = LabelBinarizer()
lb.fit(text_int)
x = lb.transform(x)
y = lb.transform(y)
print('\n**LabelBinarizer :x \n', x)
print('\n**Transform back :x \n', le.inverse_transform(lb.inverse_transform(x)))
vocab_dict = dict(zip(text_int.tolist(), vocab))
vocab_reverse_dict = dict(zip(vocab, text_int.tolist()))
vocab_onehot = lb.transform(text_int)
vocab_onehot_dict = dict(zip(text_int.tolist(), vocab_onehot))


**Input :x 
 ['H', 'e', 'l', 'l', 'o', ' ', 'h', 'o', 'w', ' ', 'a', 'r', 'e', ' ', 'y', 'o', 'u', ' ', 'd', 'o', 'i', 'n', 'g', '?', ' ', 'I', ' ', 'a', 'm', ' ', 'g', 'r', 'e', 'a', 't']

**LabelEncoder :x 
 [ 2  6 10 10 13  0  8 13 17  0  4 14  6  0 18 13 16  0  5 13  9 12  7  1  0
  3  0  4 11  0  7 14  6  4 15]

**LabelBinarizer :x 
 [[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0

# Building a char RNN

 >The RNN has number of states = 'n_state_size' and the number hidden units = hidden_size   
 The inputs X, Y and fed in the size = {length of each one_hot_encoded input, no of states}    
 The RNN class has the following methods:   
     * init = Initializes the weights, state variables, loss function and optimizers. Creates the state graph.    
     * train = Trains the classifier given the inputs X, Y and the 'valid_char' string for testing during training
     * test = generates a sequence based on a input test string (of length state_size)
 

In [32]:

class charRNN(object):
    
    def __init__(self, n_hidden, n_vocab_size, n_state_size, vocab_dict, vocab_reverse_dict, vocab_onehot_dict):
        
        self.hidden_size = n_hidden
        self.state_size = n_state_size
        self.truncated_backprop_length = n_state_size
        self.vocab_size = n_vocab_size
        
        #copy the dictionary
        self.vocab_onehot_dict = vocab_onehot_dict
        self.vocab_reverse_dict = vocab_reverse_dict
        self.vocab_dict = vocab_dict
               
        # model inputs
        self.x = tf.placeholder(tf.float32, [self.vocab_size, self.truncated_backprop_length])
        self.y = tf.placeholder(tf.int32, [self.vocab_size, self.truncated_backprop_length])
        
        inputs_series = tf.unstack(self.x, axis=1) # unpack the columns
        labels_series = tf.unstack(self.y, axis=1) # unpack the columns
        
        # get the network weights
        network_weights = self._initialize_weights()
        self.weights = network_weights
        
        # model
        current_state = self.weights['init_state']
        states_series = []
        for X_i in inputs_series:
            X_i = tf.reshape(X_i, [-1, self.vocab_size])       
            next_state = tf.tanh(tf.matmul(X_i, self.weights['Wxh']) + 
                                 tf.matmul(current_state, self.weights['Whh']) + self.weights['bhh'])           
            states_series.append(next_state)
            current_state = next_state
        self.states_series = states_series
        
        #logits and cost
        logits_series = [tf.matmul(state, self.weights['Why'])+ self.weights['bhy'] for state in self.states_series]
        
        self.pred_series = [tf.nn.softmax(logits) for logits in logits_series]
        self.loss = tf.reduce_mean([tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) for logits, labels in zip(logits_series, labels_series)])
        self.optimizer = tf.train.AdagradOptimizer(0.5).minimize(self.loss)
        
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)
    
        
    def _initialize_weights(self):
        all_weights = dict()
        all_weights['init_state'] = tf.Variable(tf.zeros([1, self.hidden_size]), dtype=tf.float32, name='init_state')
        all_weights['Wxh'] = tf.Variable(tf.truncated_normal((self.vocab_size, self.hidden_size), stddev=0.01), 
                                         dtype=tf.float32, name='Wxh')
        all_weights['Whh'] = tf.Variable(tf.truncated_normal((self.hidden_size, self.hidden_size), stddev=0.01), 
                                         dtype=tf.float32, name='Whh')
        all_weights['bhh'] = tf.Variable(tf.zeros(self.hidden_size), dtype=tf.float32, name='bhh')
        all_weights['Why'] = tf.Variable(tf.truncated_normal((self.hidden_size, self.vocab_size), stddev=0.01), 
                                         dtype=tf.float32, name='Why')
        all_weights['bhy'] = tf.Variable(tf.zeros(self.vocab_size), dtype=tf.float32, name='bhy')
        
        return all_weights
    
    def train(self, X, Y, n_epoch, valid_char):
        epoch_disp = 2
        for epoch in range(n_epoch):
            for batch in range(len(X)):
                start_idx = batch
                end_idx = min(start_idx + self.truncated_backprop_length, len(x))
                if (end_idx - start_idx) < self.truncated_backprop_length :
                    break
                batchX = X[start_idx:end_idx].T
                batchY = Y[start_idx:end_idx].T
                loss, _ = self.sess.run((self.loss, self.optimizer), feed_dict={self.x: batchX, self.y: batchY})
            
            if(epoch %(epoch_disp)**2 ==0):
                print('Loss epoch({:})={:3.4f}'.format(epoch,loss))
                input_char = valid_char
                batchX = np.array([self.vocab_onehot_dict[self.vocab_reverse_dict[i]].tolist() for i in input_char])
                self.generate_sequence(batchX)
                epoch_disp += epoch
            
    def test(self, input_char):
        batchX = np.array([self.vocab_onehot_dict[self.vocab_reverse_dict[i]].tolist() for i in input_char])
        self.generate_sequence(batchX, 50)
    
    def generate_sequence(self, X, n_samples=50):
        for i in range(n_samples):
            pred_series = self.sess.run(self.pred_series, feed_dict={self.x: X.T})
            pred_idx = np.argmax(pred_series, axis=2).flatten()

            #print('Input', le.inverse_transform(lb.inverse_transform(batchX)))
            #print('Output:', [self.vocab_dict[pred_idx[i]] for i in range(self.truncated_backprop_length)])
            X = np.roll(X, -1, axis=0)
            X[self.truncated_backprop_length-1] = self.vocab_onehot_dict[pred_idx[self.truncated_backprop_length-1]]
            print(self.vocab_dict[pred_idx[self.truncated_backprop_length-1]], end='')
        print('\n')
    
    def save_model(self, model_path):
        saver = tf.train.Saver()
        save_path = saver.save(self.sess, model_path)
        print("Model saved in file: %s" % save_path)


>The network is trained, and during testing, the output which gives the max probabilty is feed back to the input to generate an sequence

In [35]:
# training the Session
tf.reset_default_graph()
n_hidden = 10
n_vocab_size = len(vocab)
n_state_size = 4

rnn = charRNN(n_hidden, n_vocab_size, n_state_size, vocab_dict, vocab_reverse_dict, vocab_onehot_dict)

rnn.train(x, y, 500, 'Hell')
save_path = './char_rnn'
rnn.save_model(save_path)

Loss epoch(0)=2.2962
aear ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae 

Loss epoch(4)=0.2106
oingreat great great great great great great great

Loss epoch(36)=0.2708
o how are you doing? I am great how are you doing?

Model saved in file: ./char_rnn


 > The training starts with bad predictions but as time progresses it prediction becomes better

In [36]:
# testing on a input
rnn.test('how ')


are you doing? I am great how are you doing? I am 



# Experiments with state variables

In [89]:
# Load the model 

def pred_next_char(input_char, sess, state, weights, mask_state_var):
    X = np.array(vocab_onehot_dict[vocab_reverse_dict[input_char]].tolist())
    X = tf.constant(X, dtype=tf.float32)
    X = tf.reshape(X, [-1, n_vocab_size])
    mask = tf.constant(mask_state_var, dtype=tf.float32)
    state = tf.multiply(state, mask)
    h = tf.tanh(tf.matmul(X, weights['Wxh']) + tf.matmul(state, weights['Whh']) + weights['bhh'])
    p = tf.matmul(h, weights['Why'])+weights['bhy']
    out = sess.run(p)
    return out, h

def predict_var_char(input_char, mask_state_var):
    tf.reset_default_graph()
    loaded_graph = tf.Graph()
    with tf.Session(graph=loaded_graph) as sess:
        # Load model
        loader = tf.train.import_meta_graph(save_path+ '.meta')
        loader.restore(sess, save_path)
        loaded_weights = dict()
        loaded_init_state = loaded_graph.get_tensor_by_name('init_state:0')
        loaded_weights['Wxh'] = loaded_graph.get_tensor_by_name('Wxh:0')
        loaded_weights['Whh'] = loaded_graph.get_tensor_by_name('Whh:0')
        loaded_weights['bhh'] = loaded_graph.get_tensor_by_name('bhh:0')
        loaded_weights['Why'] = loaded_graph.get_tensor_by_name('Why:0')
        loaded_weights['bhy'] = loaded_graph.get_tensor_by_name('bhy:0')
    
        #input_char = 'do'
        print('Input Feed:', [input_char], end=' ')
        print('Output Obtained: ', end='')
        h = loaded_init_state
        
        for i in range(30):
            for ch in input_char:
                out, h = pred_next_char(ch, sess, h, loaded_weights, mask_state_var)  
            idx_o = np.argmax(out, axis=1)
            input_char = vocab_dict[idx_o[0]]
            print(input_char, end='')

In [93]:
predict_var_char('doi',[1.,1.,1.,1.,1.,1.,0.,1.,1.,1.])

Input Feed: ['doi'] Output Obtained: ng? I am great you doing? I am

> When the input is 'o', there are many possible outputs : h or w or u or i with a single state. Here it predicted the next char as 'i' and then using 'oi' predicted 'n' and so on.

In [52]:
predict_var_char('ho')

Input Feed: ['ho'] Output Obtained: w are you doing? I am great yo