In [1]:
import tensorflow as tf
import numpy as np

from collections import namedtuple
from tqdm import tqdm
import os
import time

In [2]:
# Reading the data
data = open('sample_input.big.txt').read()

# Vocabulary business

# 1. Create a list of unique characters
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

char_to_ix = {ch:i for i,ch in enumerate(chars) }
ix_to_chars = {i:ch for i,ch in enumerate(chars)}

x=np.zeros(len(data))
for i, c in enumerate(data):
    x[i]=char_to_ix[c]

data=x

print("The dataset has %d characters and %d unique." %(data_size, vocab_size))

The dataset has 1115394 characters and 65 unique.


In [91]:
# Defining hyperparameter tuple and setting hyper parameters
hparams = namedtuple('hyper_parameters', 
                     'hidden_size, seq_length, learning_rate,'
                     'batch_size, vocab_size,'
                    'num_epochs')


hps = hparams(hidden_size=200,
              seq_length=20,
              learning_rate=1e-1,
              batch_size=200,
              vocab_size=vocab_size,
              num_epochs=1)

In [137]:
class deepMind(object):
    def __init__(self, hps, mode, debug=True):
        self.hps=hps
        self.mode=mode
        self.batch_pointer=0
        
    def buildGraph(self, variant="fixed_length"):
        # Extracting structural specifics from HPS
        D = self.hps.vocab_size
        H = self.hps.hidden_size
        N = self.hps.batch_size
        T = self.hps.seq_length
        print("")
        self.global_step = tf.contrib.framework.get_or_create_global_step()
                       
        # Network :)
        with tf.name_scope("PlaceHolders"):
            self.X = tf.placeholder(tf.int32, [N,T], "Inputs")
            self.Y = tf.placeholder(tf.int32, [N,T], "Expected_Output")
        
        with tf.name_scope("embedding"):
            embedding = tf.Variable(tf.random_uniform(
                    [D, H],
                    -1.0, 1.0), dtype=tf.float32)
        #tf.get_variable("embedding", [D,H], 
        #                            initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0))
            inputs = tf.nn.embedding_lookup(embedding, self.X)
            
        with tf.name_scope("batch_norm"):
            inputs = tf.layers.batch_normalization(inputs)
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(H)
        
        outputs, last_states = tf.nn.dynamic_rnn(
                                         cell=lstm_cell,
                                         dtype=tf.float32,
                                         inputs=inputs)
        print("HIT", outputs.get_shape().as_list())
        #outputs = tf.contrib.layers.flatten(outputs)
        
        #outputs=tf.reshape(outputs, [-1,H])
        
        with tf.name_scope("Dense_Output_Layer"):
            scores=tf.layers.dense(outputs, D)
        
        print("HITP", outputs.get_shape().as_list())
        tf.summary.histogram('scores', scores)
        
        print(scores.get_shape().as_list())
        
        return scores
        #tf.contrib.legacy_seq2seq()
      
        
    def trainStep(self, scores):
        # Extracting structural specifics from HPS
        D = self.hps.vocab_size
        H = self.hps.hidden_size
        N = self.hps.batch_size
        T = self.hps.seq_length
        
        
        
        # Define the additional part of the network Used for training
        # Loss and Optimizer
        y_int=tf.reshape(self.Y, [-1,1])
        print(y_int.get_shape().as_list())
        print(scores.get_shape().as_list())
        with tf.name_scope("COST"):
            loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.one_hot(y_int,self.hps.vocab_size),
                #logits=tf.reshape(scores,[-1,self.hps.vocab_size]),
                logits=scores,
                name="softMaxCrossEntropy"
            )
            loss = tf.reduce_sum(loss)
        #print(loss.get_shape().as_list())
        tf.summary.scalar('loss_', loss)
        
        
        with tf.name_scope("Predictions"):
            predictions = tf.cast(tf.argmax(scores, axis=-1, name="predictions"), tf.int32)
        #accuracy=tf.reduce_mean(tf.cast(tf.equal(predictions, self.Y), tf.float32))
                                         
        #solver = tf.train.AdamOptimizer(hps.learning_rate)
        #solver = tf.train.GradientDescentOptimizer(hps.learning_rate)
        solver = tf.train.MomentumOptimizer(hps.learning_rate, 0.9)
        
        tvars  = tf.trainable_variables()
        gs_int = tf.gradients(loss, tvars)
        grads = list(zip(gs_int, tf.trainable_variables()))
        #gs, _  = tf.clip_by_global_norm(gs_int, 3.0)
        for grad, var in grads:
            tf.summary.histogram(var.name + '/gradient', grad)
        #tf.summary.histogram('gradients', gs)
        train_step = solver.apply_gradients(grads, global_step=self.global_step)
        summary = tf.summary.merge_all()
        return train_step, loss, summary
    
    def train(self,sess, data,train_ops, writer=None, print_every=100):
        num_batches=data.shape[0]//self.hps.batch_size//self.hps.seq_length
        #print(num_batches)
        for e in tqdm(list(range(hps.num_epochs)), desc='epoch'):
            total_correct=0
            #print("Reset", total_correct)
            for i in range(num_batches):
                x,y = self.getNextBatch(data)
                feed_dict={self.X: x, self.Y:y}
                _, loss, summary = sess.run(train_ops, feed_dict=feed_dict)
                if (i % print_every == 0):
                    print("Training Accuracy in iter %d: %f" %(i, loss))
                if writer is not None:
                    writer.add_summary(summary)
            #acc=total_correct/(self.hps.batch_size*self.hps.seq_length*num_batches)
                
            
            total_correct=0
        
    
    def sample(self, sess, scores, 
               #seed="Super Bowl was an American football game to determine the champion of the National Football League",
               seed="S",
               length=1000, beam_width=5):
        #predictions = tf.cast(tf.argmax(scores, axis=-1, name="predictions"), tf.int32)
        pd = tf.nn.softmax(scores)
        x= [char_to_ix[i] for i in seed]
        #print(x)
        x=np.asarray(x).reshape(1,-1)
        y=np.zeros_like(x)
        feed_dict={self.X:x, self.Y:y}
        pred_str=[]
        for i in range(length):
            p=sess.run(pd, feed_dict=feed_dict)
            #rint(p.shape)
            ix = np.random.choice(range(self.hps.vocab_size), p=p.ravel())
            p.reshape(1,-1)
            feed_dict[self.X]=ix*np.ones((1,1))
            pred_str+=ix_to_chars[ix]
            #print(np.asscalar(p))
        
        print(''.join(pred_str))
            
            
            
        
        
    
    # data is expected to be numpy array of indices
    def getNextBatch(self, data):
        start=self.batch_pointer
        end=start+self.hps.batch_size*self.hps.seq_length
        self.batch_pointer=end
        x=np.take(data, range(start,end), mode='wrap').reshape(hps.batch_size,-1)
        y=np.copy(x)
        y[:-1]=x[1:]
        y[-1]=x[0]
        return x,y
        
                                
        




In [138]:
model=deepMind (hps,'train')

tf.reset_default_graph()
scores=model.buildGraph()
train_ops=model.trainStep(scores)

config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess=tf.Session(config=config)
summaries=tf.summary.merge_all()
writer = tf.summary.FileWriter(
            os.path.join('./tf_logs', time.strftime("%Y-%m-%d-%H-%M-%S")))
writer.add_graph(sess.graph)

with tf.device("/gpu:0"): #"/cpu:0" or "/gpu:0" 
    
    sess.run(tf.global_variables_initializer())
    model.train(sess, data, train_ops,writer)
    
    
    #model.sample(sess, scores)  


HIT [200, 20, 200]
HITP [200, 20, 200]
[200, 20, 65]
[4000, 1]
[200, 20, 65]
INFO:tensorflow:Summary name embedding/Variable:0/gradient is illegal; using embedding/Variable_0/gradient instead.


INFO:tensorflow:Summary name embedding/Variable:0/gradient is illegal; using embedding/Variable_0/gradient instead.


INFO:tensorflow:Summary name batch_normalization/beta:0/gradient is illegal; using batch_normalization/beta_0/gradient instead.


INFO:tensorflow:Summary name batch_normalization/beta:0/gradient is illegal; using batch_normalization/beta_0/gradient instead.


INFO:tensorflow:Summary name batch_normalization/gamma:0/gradient is illegal; using batch_normalization/gamma_0/gradient instead.


INFO:tensorflow:Summary name batch_normalization/gamma:0/gradient is illegal; using batch_normalization/gamma_0/gradient instead.


INFO:tensorflow:Summary name rnn/basic_lstm_cell/weights:0/gradient is illegal; using rnn/basic_lstm_cell/weights_0/gradient instead.


INFO:tensorflow:Summary name rnn/basic_lstm_cell/weights:0/gradient is illegal; using rnn/basic_lstm_cell/weights_0/gradient instead.


INFO:tensorflow:Summary name rnn/basic_lstm_cell/biases:0/gradient is illegal; using rnn/basic_lstm_cell/biases_0/gradient instead.


INFO:tensorflow:Summary name rnn/basic_lstm_cell/biases:0/gradient is illegal; using rnn/basic_lstm_cell/biases_0/gradient instead.


INFO:tensorflow:Summary name dense/kernel:0/gradient is illegal; using dense/kernel_0/gradient instead.


INFO:tensorflow:Summary name dense/kernel:0/gradient is illegal; using dense/kernel_0/gradient instead.


INFO:tensorflow:Summary name dense/bias:0/gradient is illegal; using dense/bias_0/gradient instead.


INFO:tensorflow:Summary name dense/bias:0/gradient is illegal; using dense/bias_0/gradient instead.
epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Training Accuracy in iter 0: 16774.820312
Training Accuracy in iter 100: 27948858.000000
Training Accuracy in iter 200: 43884828.000000

epoch: 100%|██████████| 1/1 [00:03<00:00,  3.85s/it]







In [139]:

model.sample(sess, scores)  

ValueError: Cannot feed value of shape (1, 1) for Tensor 'PlaceHolders/Inputs:0', which has shape '(200, 20)'