In [25]:
import tensorflow as tf
import numpy as np
import time
import random
import json

In [2]:
DATA_DIR='data/shakespeare.txt'
BATCH_SIZE=25
LAYER_NUM=1
SEQ_LENGTH=50
HIDDEN_DIM=128
GENERATE_LEN=500
NB_EPOCH=1500
MODE='train'
WEIGHTS=''
LEARNING_RATE=1e-1
IS_TRAIN=True

In [3]:
def get_text_data():
    with open(DATA_DIR,"r") as text_file:
        data=text_file.read()
        tr_data,va_data=data[0:1070392],data[1070392:1075392]
    lis=list(set(data))    
    total_char,unique_char=len(data),len(lis)
    print ('data has %d characters, %d unique.' %(total_char, unique_char))
    char_to_ix = { ch:i for i,ch in enumerate(lis) }
    ix_to_char = { i:ch for i,ch in enumerate(lis) }
#     return total_char,unique_char,char_to_ix,ix_to_char,data,tr_data,va_data
    return data

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [4]:
data=get_text_data()

data has 1115393 characters, 65 unique.


In [5]:
with open('data/coding2.json') as file:
    da=json.load(file)
    total_chars,unique_chars,char_id=da['total'],da['unique'],da['char_id']

id_char={int(v):k for k,v in char_id.items()}

In [6]:
def generate_next_batch(given_data, seq_length, p):
    inputs = [char_id[ch] for ch in given_data[p:p+seq_length]]
    targets = [char_id[ch] for ch in given_data[p+1:p+seq_length+1]]
    inputs=np.array(inputs).reshape(1,seq_length)
    targets=np.array(targets).reshape(1,seq_length)
    return inputs,targets

In [7]:
#Preprocessing- generated all the batches at once rather than dynamic generation
ptr=0
batches=(total_chars//SEQ_LENGTH)
x=[]
y=[]
for batch in range(batches):
    a,b=generate_next_batch(data, SEQ_LENGTH, ptr)
    x.append(a)
    y.append(b)
    ptr+=SEQ_LENGTH
x=np.array(x).reshape(batches, SEQ_LENGTH)
y=np.array(y).reshape(batches, SEQ_LENGTH)

In [8]:
y.shape

(22307, 50)

In [19]:
def sample(n):
        seed_int=random.randint(0,64)
        seed_seq=id_char[seed_int]
        model2 = CharRNN(unique_chars, sampling=True,
                    hidden_dim=HIDDEN_DIM, num_layers=LAYER_NUM,
                    learning_rate=LEARNING_RATE)
        checkpoint_path=tf.train.latest_checkpoint('checkpoint/')
        with tf.Session() as sess:
            saver=tf.train.Saver()
            saver.restore(sess, checkpoint_path)
            init_state=sess.run(model2.initial_state)
            xx=np.zeros((1,1))
            ixes=[]
            xx[0][0]=char_id[seed_seq]
            ixes.append(char_id[seed_seq])
            for t in range(n):
                prob,init_state=sess.run([model2.prediction,model2.final_state],
                                         feed_dict={model2.inputs:xx, model2.initial_state:init_state})
                ix=np.random.choice(range(unique_chars), p=prob.ravel())
                ixes.append(ix)
                xx[0][0]=ix
            return ''.join(id_char[ch] for ch in ixes)

In [10]:
def build_LSTM(hidden_dim, num_layers, batch_size):
    with tf.name_scope('lstm'):
        cells= [tf.contrib.rnn.BasicLSTMCell(num_units=HIDDEN_DIM) for layer in range(LAYER_NUM)]
        multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
        _initial_state =  multi_layer_cell.zero_state(batch_size, tf.float32)
        return multi_layer_cell, _initial_state

In [11]:
def output(lstm_output, hidden_dim, out_size):

    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, hidden_dim])
    
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal([hidden_dim, out_size],stddev=0.1))
        softmax_b = tf.Variable(tf.zeros([out_size]))
    
    logits = tf.matmul(x, softmax_w) + softmax_b
    
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

In [12]:
def loss(logits, targets, lstm_size, vocab_size):
    
    y_one_hot = tf.one_hot(targets, vocab_size)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Softmax cross entropy loss
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_reshaped, logits=logits))
    
    return loss

In [13]:
def optimizer(loss, learning_rate, grad_clip=5):
    
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [14]:
class CharRNN:
    
    def __init__(self, vocab_size, batch_size=25, seq_length=50, hidden_dim=128, num_layers=2, 
                 learning_rate=0.001, grad_clip=5, sampling=False):
    
        if sampling == True:
            batch_size, seq_length = 1, 1
        else:
            batch_size, seq_length = batch_size, seq_length
        
        self.inputs, self.targets = tf.placeholder(tf.int32,[batch_size,seq_length],name='inputs'), tf.placeholder(tf.int32,[batch_size, seq_length],name='targets')

        # Build the LSTM cell
        multi_cell, self.initial_state = build_LSTM(hidden_dim, num_layers=num_layers, batch_size=batch_size)
        
        x_one_hot = tf.one_hot(self.inputs, vocab_size)
        
        outputs, state = tf.nn.dynamic_rnn(multi_cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        self.prediction, self.logits = output(outputs, hidden_dim, vocab_size)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = loss(self.logits, self.targets, hidden_dim,vocab_size)
        self.optimizer = optimizer(self.loss, learning_rate, grad_clip)

In [15]:
no_of_batches=int(len(x)//BATCH_SIZE)
ptr=0
iteration=0

In [105]:
tf.reset_default_graph()
epochs = 100
 # Save every N iterations# Save e 
save_every_n = 200

model = CharRNN(unique_chars, batch_size=BATCH_SIZE, seq_length=SEQ_LENGTH,
                hidden_dim=HIDDEN_DIM, num_layers=LAYER_NUM, 
                learning_rate=LEARNING_RATE)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    zero_state=sess.run(model.initial_state)
    for e in range(epochs):
        new_state = zero_state
        for i in range(no_of_batches):
            if (ptr+BATCH_SIZE+1 >= batches): 
                ptr=0
            inputs,targets=x[ptr:ptr+BATCH_SIZE],y[ptr:ptr+BATCH_SIZE]
            ptr+=BATCH_SIZE
            start=time.time()
            feed = {model.inputs: inputs,
                    model.targets: targets,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            end = time.time()
            if(iteration%100==0):
                print('Epoch: %i/%i... '%(e+1, epochs),
                      'Training Step: %i... '%iteration,
                      'Training loss: %f... '%batch_loss,
                      '{:.4f} sec/batch'.format(end-start))
            iteration+=1
        if (e % 2 == 0):
            saver.save(sess, "checkk/i{}_l{}.ckpt".format(iteration//no_of_batches, batch_loss))
    saver.save(sess, "checkk/i{}_l{}.ckpt".format(counter, HIDDEN_DIM))

Epoch: 1/100...  Training Step: 0...  Training loss: 4.175072...  0.1027 sec/batch
Epoch: 1/100...  Training Step: 100...  Training loss: 2.154204...  0.0638 sec/batch
Epoch: 1/100...  Training Step: 200...  Training loss: 2.031109...  0.0638 sec/batch
Epoch: 1/100...  Training Step: 300...  Training loss: 1.932110...  0.0638 sec/batch
Epoch: 1/100...  Training Step: 400...  Training loss: 1.856728...  0.0688 sec/batch
Epoch: 1/100...  Training Step: 500...  Training loss: 1.917502...  0.0638 sec/batch
Epoch: 1/100...  Training Step: 600...  Training loss: 1.789049...  0.0648 sec/batch
Epoch: 1/100...  Training Step: 700...  Training loss: 1.754209...  0.0678 sec/batch
Epoch: 1/100...  Training Step: 800...  Training loss: 1.644505...  0.0678 sec/batch
Epoch: 2/100...  Training Step: 900...  Training loss: 2.074351...  0.0648 sec/batch
Epoch: 2/100...  Training Step: 1000...  Training loss: 1.909281...  0.0648 sec/batch
Epoch: 2/100...  Training Step: 1100...  Training loss: 1.694085..

KeyboardInterrupt: 

In [21]:
reset_graph()
print(sample(200))

INFO:tensorflow:Restoring parameters from checkpoint/i43708_l1.4605046510696411.ckpt
bel intoer full
not a match'd me.
Much eaten, in Talk, sharm to theirs! afier,
To r my brother be tamas
Are dint here adred
for hast will beer sinming him us, a greate and for
a glasse as it
me, they d
