In [141]:
import tensorflow as tf
import numpy as np

from collections import namedtuple
from tqdm import tqdm
import os
import time

In [161]:
# Reading the data
data = open('sample_input.txt').read()

# Vocabulary business

# 1. Create a list of unique characters
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

char_to_ix = {ch:i for i,ch in enumerate(chars) }
ix_to_chars = {i:ch for i,ch in enumerate(chars)}

x=np.zeros(len(data))
for i, c in enumerate(data):
    x[i]=char_to_ix[c]

data=x

print("The dataset has %d characters and %d unique." %(data_size, vocab_size))

The dataset has 3729 characters and 61 unique.


In [223]:
# Defining hyperparameter tuple and setting hyper parameters
hparams = namedtuple('hyper_parameters', 
                     'hidden_size, seq_length, learning_rate,'
                     'batch_size, vocab_size,'
                    'num_epochs')


hps = hparams(hidden_size=200,
              seq_length=5,
              learning_rate=1e-2,
              batch_size=1,
              vocab_size=vocab_size,
              num_epochs=15)

In [224]:
class deepMind(object):
    def __init__(self, hps, mode, debug=True):
        self.hps=hps
        self.mode=mode
        self.batch_pointer=0
        
    def buildGraph(self, variant="fixed_length"):
        # Extracting structural specifics from HPS
        D = self.hps.vocab_size
        H = self.hps.hidden_size
        N = self.hps.batch_size
        T = self.hps.seq_length
        print("")
        
                       
        # Network :)
        with tf.name_scope("PlaceHolders"):
            self.X = tf.placeholder(tf.int32, [None, None], "Inputs")
            self.Y = tf.placeholder(tf.int32, [None, None], "Expected_Output")
        
        #with tf.name_scope("embedding"):
        #    embedding = tf.Variable(tf.random_uniform(
        #            [D, H],
        #            -1.0, 1.0), dtype=tf.float32)
        #tf.get_variable("embedding", [D,H], 
        #                            initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0))
        #    inputs = tf.nn.embedding_lookup(embedding, self.X)
        inputs = tf.one_hot(self.X,self.hps.vocab_size)
            
        #with tf.name_scope("batch_norm"):
        #    inputs = tf.layers.batch_normalization(inputs)
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(H)
        
        outputs, last_states = tf.nn.dynamic_rnn(
                                         cell=lstm_cell,
                                         dtype=tf.float32,
                                         inputs=inputs,
                                        # initial_state=lstm_cell.zero_state(N,tf.float32)
                                         )       
             
        with tf.name_scope("Dense_Output_Layer"):
            outputs=tf.reshape(outputs, [-1, H])
            scores=tf.layers.dense(outputs, D)
        tf.summary.histogram('scores', scores)
        self.scores=scores
        return self.scores
      
        
    def trainStep(self, scores):
        # Extracting structural specifics from HPS
        D = self.hps.vocab_size
        H = self.hps.hidden_size
        N = self.hps.batch_size
        T = self.hps.seq_length
        
        
        
        # Define the additional part of the network Used for training
        # Loss and Optimizer
        y_int=tf.reshape(self.Y, [-1])
        with tf.name_scope("COST"):
            loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.one_hot(y_int,self.hps.vocab_size),
                logits=scores,
                name="softMaxCrossEntropy"
            )
            loss = tf.reduce_mean(loss)
        tf.summary.scalar('loss_', loss)
        
        
        with tf.name_scope("Predictions"):
            predictions = tf.cast(tf.argmax(scores, axis=-1, name="predictions"), tf.int32)
        #accuracy=tf.reduce_mean(tf.cast(tf.equal(predictions, self.Y), tf.float32))
                                         
        solver = tf.train.AdamOptimizer(hps.learning_rate)
        #solver = tf.train.GradientDescentOptimizer(hps.learning_rate)
        #solver = tf.train.MomentumOptimizer(hps.learning_rate, 0.9)
        
        tvars  = tf.trainable_variables()
        gs_int = tf.gradients(loss, tvars)
        grads = list(zip(gs_int, tf.trainable_variables()))
        gs, _  = tf.clip_by_global_norm(gs_int, 3.0)
        for grad, var in grads:
            tf.summary.histogram(var.name + '/gradient', grad)
        #tf.summary.histogram('gradients', gs)
        train_step = solver.apply_gradients(zip(gs,tvars), global_step=tf.contrib.framework.get_or_create_global_step())
        #train_step = solver.apply_gradients(grads, global_step=tf.contrib.framework.get_or_create_global_step())
        summary = tf.summary.merge_all()
        return train_step, loss, summary
    
    def train(self,sess, data,train_ops, writer=None, print_every=100):
        num_batches=data.shape[0]//self.hps.batch_size//self.hps.seq_length
        #print(num_batches)
        for e in tqdm(list(range(hps.num_epochs)), desc='epoch'):
            total_correct=0
            #print("Reset", total_correct)
            for i in range(num_batches):
                x,y = self.getNextBatch(data)
                feed_dict={self.X: x, self.Y:y}
                _, loss, summary = sess.run(train_ops, feed_dict=feed_dict)
                if (i % print_every == 0):
                    print("Loss, Iter %d: %f" %(i, loss))
                if writer is not None:
                    writer.add_summary(summary)
            #Sample after every 10 epochs to see how we are doing
            if (e%1 == 0): 
                self.sample(sess,self.scores)
        
    
    def sample(self, sess, scores, 
               seed="O",
               #seed="I",
               length=200, beam_width=5):
        pd = tf.nn.softmax(scores)
        x= [char_to_ix[i] for i in seed]
        x=np.asarray(x).reshape(1,-1)
        y=np.zeros_like(x)
        feed_dict={self.X:x, self.Y:y}
        pred_str=[]
       # print(x.shape, y.shape)
        for i in range(length):
            p=sess.run(pd, feed_dict=feed_dict)
            p=p[-1]
            #print(p.shape)
            ix = np.random.choice(range(self.hps.vocab_size), p=p.ravel())
            #ix=np.argmax(p.ravel())
            p.reshape(1,-1)
            feed_dict[self.X]=ix*np.ones((1,1))
            pred_str.append(ix_to_chars[ix])
        txt=''.join(pred_str)
        #print(len(pred_str))
        print ('----\n %s \n----' % (txt, ))

            
            
            
        
        
    
    # data is expected to be numpy array of indices
    def getNextBatch(self, data):
        start=self.batch_pointer
        end=start+self.hps.batch_size*self.hps.seq_length
        self.batch_pointer=end
        x=np.take(data, range(start,end), mode='wrap').reshape(hps.batch_size,-1)
        y=np.copy(x)
        y[:-1]=x[1:]
        y[-1]=x[0]
        return x,y
        
                                
        




In [225]:
model=deepMind (hps,'train')

tf.reset_default_graph()
scores=model.buildGraph()
train_ops=model.trainStep(scores)

config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess=tf.Session(config=config)
summaries=tf.summary.merge_all()
writer = tf.summary.FileWriter(
            os.path.join('./tf_logs', time.strftime("%Y-%m-%d-%H-%M-%S")))
writer.add_graph(sess.graph)

with tf.device("/gpu:0"): #"/cpu:0" or "/gpu:0" 
    
    sess.run(tf.global_variables_initializer())
    model.train(sess, data, train_ops,writer)
    model.sample(sess, scores) 
    
    


INFO:tensorflow:Summary name rnn/basic_lstm_cell/weights:0/gradient is illegal; using rnn/basic_lstm_cell/weights_0/gradient instead.


INFO:tensorflow:Summary name rnn/basic_lstm_cell/weights:0/gradient is illegal; using rnn/basic_lstm_cell/weights_0/gradient instead.


INFO:tensorflow:Summary name rnn/basic_lstm_cell/biases:0/gradient is illegal; using rnn/basic_lstm_cell/biases_0/gradient instead.


INFO:tensorflow:Summary name rnn/basic_lstm_cell/biases:0/gradient is illegal; using rnn/basic_lstm_cell/biases_0/gradient instead.


INFO:tensorflow:Summary name dense/kernel:0/gradient is illegal; using dense/kernel_0/gradient instead.


INFO:tensorflow:Summary name dense/kernel:0/gradient is illegal; using dense/kernel_0/gradient instead.


INFO:tensorflow:Summary name dense/bias:0/gradient is illegal; using dense/bias_0/gradient instead.


INFO:tensorflow:Summary name dense/bias:0/gradient is illegal; using dense/bias_0/gradient instead.
epoch:   0%|          | 0/15 [00:00<?, ?it/s]

Loss, Iter 0: 4.101541
Loss, Iter 100: 0.827361
Loss, Iter 200: 0.035981
Loss, Iter 300: 0.111488
Loss, Iter 400: 0.080561
Loss, Iter 500: 0.009174
Loss, Iter 600: 0.035601
Loss, Iter 700: 0.037422
----
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 
----

epoch:   7%|▋         | 1/15 [00:07<01:47,  7.68s/it]


Loss, Iter 0: 1.104358
Loss, Iter 100: 0.002319
Loss, Iter 200: 0.004266
Loss, Iter 300: 0.003586
Loss, Iter 400: 0.001683
Loss, Iter 500: 0.834942
Loss, Iter 600: 0.001588
Loss, Iter 700: 0.001318
----
 NNNNNNNNNNNNNNNNNNNNNNNNNNNNN,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 
----

epoch:  13%|█▎        | 2/15 [00:15<01:40,  7.71s/it]


Loss, Iter 0: 0.000966
Loss, Iter 100: 0.029005
Loss, Iter 200: 0.001397
Loss, Iter 300: 0.000585
Loss, Iter 400: 0.000484
Loss, Iter 500: 0.613907
Loss, Iter 600: 0.014198
Loss, Iter 700: 0.002390
----
 wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww 
----

epoch:  20%|██        | 3/15 [00:23<01:32,  7.73s/it]


Loss, Iter 0: 0.001148
Loss, Iter 100: 0.000546
Loss, Iter 200: 0.004737
Loss, Iter 300: 0.009387
Loss, Iter 400: 0.000915
Loss, Iter 500: 0.004203
Loss, Iter 600: 0.004458
Loss, Iter 700: 0.017991
----
 OOOOOOOOOOOOOOOOOOOOIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII 
----

epoch:  27%|██▋       | 4/15 [00:31<01:25,  7.76s/it]


Loss, Iter 0: 0.001033
Loss, Iter 100: 0.001209
Loss, Iter 200: 0.000241
Loss, Iter 300: 0.050685
Loss, Iter 400: 0.000670
Loss, Iter 500: 0.001407
Loss, Iter 600: 0.000405
Loss, Iter 700: 0.000082
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii 
----

epoch:  33%|███▎      | 5/15 [00:38<01:17,  7.79s/it]


Loss, Iter 0: 0.000172
Loss, Iter 100: 0.000076
Loss, Iter 200: 0.000092
Loss, Iter 300: 0.003373
Loss, Iter 400: 0.000328
Loss, Iter 500: 0.013270
Loss, Iter 600: 0.001690
Loss, Iter 700: 0.008944
----
 OOOOOOOOOO;;;;;RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR”””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””””” 
----

epoch:  40%|████      | 6/15 [00:46<01:10,  7.85s/it]


Loss, Iter 0: 0.000293
Loss, Iter 100: 0.002490
Loss, Iter 200: 0.184629
Loss, Iter 300: 0.001664
Loss, Iter 400: 0.000696
Loss, Iter 500: 0.000360
Loss, Iter 600: 0.001389
Loss, Iter 700: 0.002931
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----

epoch:  47%|████▋     | 7/15 [00:54<01:03,  7.91s/it]


Loss, Iter 0: 0.002504
Loss, Iter 100: 0.000621
Loss, Iter 200: 0.011593
Loss, Iter 300: 0.000823
Loss, Iter 400: 0.002401
Loss, Iter 500: 0.008353
Loss, Iter 600: 0.000082
Loss, Iter 700: 0.000235
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----

epoch:  53%|█████▎    | 8/15 [01:02<00:55,  7.94s/it]


Loss, Iter 0: 0.000266
Loss, Iter 100: 0.000059
Loss, Iter 200: 0.002605
Loss, Iter 300: 0.001686
Loss, Iter 400: 0.000713
Loss, Iter 500: 0.001414
Loss, Iter 600: 0.005838
Loss, Iter 700: 0.000961
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn 
----

epoch:  60%|██████    | 9/15 [01:11<00:47,  7.99s/it]


Loss, Iter 0: 0.002475
Loss, Iter 100: 0.001039
Loss, Iter 200: 0.000776
Loss, Iter 300: 0.002988
Loss, Iter 400: 0.001473
Loss, Iter 500: 0.001247
Loss, Iter 600: 0.000228
Loss, Iter 700: 0.000416
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff 
----

epoch:  67%|██████▋   | 10/15 [01:19<00:40,  8.03s/it]


Loss, Iter 0: 0.000170
Loss, Iter 100: 0.000191
Loss, Iter 200: 0.000672
Loss, Iter 300: 0.000226
Loss, Iter 400: 0.005565
Loss, Iter 500: 0.000536
Loss, Iter 600: 0.000173
Loss, Iter 700: 0.000118
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----

epoch:  73%|███████▎  | 11/15 [01:27<00:32,  8.05s/it]


Loss, Iter 0: 0.000167
Loss, Iter 100: 0.000228
Loss, Iter 200: 0.000062
Loss, Iter 300: 0.001690
Loss, Iter 400: 0.000289
Loss, Iter 500: 0.000278
Loss, Iter 600: 0.002432
Loss, Iter 700: 0.000188
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----

epoch:  80%|████████  | 12/15 [01:35<00:24,  8.08s/it]


Loss, Iter 0: 0.000902
Loss, Iter 100: 0.000318
Loss, Iter 200: 0.000528
Loss, Iter 300: 0.000359
Loss, Iter 400: 0.009840
Loss, Iter 500: 0.002290
Loss, Iter 600: 0.000191
Loss, Iter 700: 0.000270
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----

epoch:  87%|████████▋ | 13/15 [01:43<00:16,  8.10s/it]


Loss, Iter 0: 0.000172
Loss, Iter 100: 0.155891
Loss, Iter 200: 0.000201
Loss, Iter 300: 0.000155
Loss, Iter 400: 0.000326
Loss, Iter 500: 0.000110
Loss, Iter 600: 0.000471
Loss, Iter 700: 0.000155
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----

epoch:  93%|█████████▎| 14/15 [01:51<00:08,  8.12s/it]


Loss, Iter 0: 0.000175
Loss, Iter 100: 0.000063
Loss, Iter 200: 0.000534
Loss, Iter 300: 0.000057
Loss, Iter 400: 0.000025
Loss, Iter 500: 0.000154
Loss, Iter 600: 0.000038
Loss, Iter 700: 0.000102
----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----

epoch: 100%|██████████| 15/15 [01:59<00:00,  8.12s/it]


----
 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 
----



