In [1]:
import tensorflow as tf
import numpy as np

import time
from collections import namedtuple

In [2]:
DATA_DIR='data/shakespeare.txt'
BATCH_SIZE=25
LAYER_NUM=2
SEQ_LENGTH=50
HIDDEN_DIM=128
GENERATE_LEN=500
NB_EPOCH=1500
MODE='train'
WEIGHTS=''
LEARNING_RATE=1e-1
IS_TRAIN=True

In [3]:
def get_text_data():
    with open(DATA_DIR,"r") as text_file:
        data=text_file.read()
        tr_data,va_data=data[0:1070392],data[1070392:1075392]
    lis=list(set(data))    
    total_char,unique_char=len(data),len(lis)
    print ('data has %d characters, %d unique.' %(total_char, unique_char))
    char_to_ix = { ch:i for i,ch in enumerate(lis) }
    ix_to_char = { i:ch for i,ch in enumerate(lis) }
    return data

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [4]:
data=get_text_data()

data has 1115393 characters, 65 unique.


In [5]:
import json

with open('data/coding2.json') as file:
    da=json.load(file)
    total_chars,vocab_size,char_id,id_char=da['total'],da['unique'],da['char_id'],da['id_char']

id_v={int(k):v for k,v in id_char.items()}

In [6]:
def generate_next_batch(given_data, seq_length, p):
    inputs = [char_id[ch] for ch in given_data[p:p+seq_length]]
    targets = [char_id[ch] for ch in given_data[p+1:p+seq_length+1]]
    inputs=np.array(inputs).reshape(1,seq_length)
    targets=np.array(targets).reshape(1,seq_length)
    return inputs,targets

In [7]:
ptr=0
batches=(total_chars//SEQ_LENGTH)
x=[]
y=[]
for batch in range(batches):
    a,b=generate_next_batch(data, SEQ_LENGTH, ptr)
    x.append(a)
    y.append(b)
    ptr+=SEQ_LENGTH
x=np.array(x).reshape(batches, SEQ_LENGTH)
y=np.array(y).reshape(batches, SEQ_LENGTH)

In [8]:
y.shape

(22307, 50)

In [9]:
def get_placeholders(batch_size,seq_length):
        # Declare placeholders we'll feed into the graph
    inputs = tf.placeholder(tf.int32, [batch_size, seq_length], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, seq_length], name='targets')
    return inputs, targets

In [10]:
def build_LSTM(lstm_size, num_layers, batch_size): 
    
    with tf.name_scope('lstm'):
        cells= [tf.contrib.rnn.BasicLSTMCell(num_units=lstm_size) 
                for layer in range(num_layers)]
        multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
#         batch_size_T  = np.shape(x)[0]
        _initial_state =  multi_layer_cell.zero_state(batch_size, tf.float32)
    return multi_layer_cell, _initial_state

In [11]:
def output(lstm_output, hidden_dim, out_size):

    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, hidden_dim])
    
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal([hidden_dim, out_size],stddev=0.1))
        softmax_b = tf.Variable(tf.zeros([out_size]))
    
    logits = tf.matmul(x, softmax_w) + softmax_b
    
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

In [12]:
def loss(logits, targets, lstm_size, vocab_size):
    
    y_one_hot = tf.one_hot(targets, vocab_size)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Softmax cross entropy loss
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_reshaped, logits=logits))
    
    return loss

In [13]:
def optimizer(loss, learning_rate, grad_clip=5):
    
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [15]:
class CharRNN:
    
    def __init__(self, vocab_size, batch_size=25, seq_length=50, hidden_dim=128, num_layers=1, 
                 learning_rate=0.001, grad_clip=5, sampling=False):
    
        if sampling == True:
            batch_size, seq_length = 1, 1
        else:
            batch_size, seq_length = batch_size, seq_length

        tf.reset_default_graph()
        
        self.inputs, self.targets = get_placeholders(batch_size,seq_length)

        # Build the LSTM cell
        multi_cell, self.initial_state = build_LSTM(hidden_dim, num_layers=num_layers, batch_size=batch_size)
        
        x_one_hot = tf.one_hot(self.inputs, vocab_size)
        
        outputs, state = tf.nn.dynamic_rnn(multi_cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        self.prediction, self.logits = output(outputs, hidden_dim, vocab_size)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = loss(self.logits, self.targets, hidden_dim,vocab_size)
        self.optimizer = optimizer(self.loss, learning_rate, grad_clip)

In [16]:
model = CharRNN(vocab_size, batch_size=BATCH_SIZE, seq_length=SEQ_LENGTH,
                hidden_dim=HIDDEN_DIM, num_layers=LAYER_NUM, 
                learning_rate=LEARNING_RATE)

In [17]:
model.initial_state

(LSTMStateTuple(c=<tf.Tensor 'lstm/MultiRNNCellZeroState/BasicLSTMCellZeroState/zeros:0' shape=(25, 128) dtype=float32>, h=<tf.Tensor 'lstm/MultiRNNCellZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(25, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'lstm/MultiRNNCellZeroState/BasicLSTMCellZeroState_1/zeros:0' shape=(25, 128) dtype=float32>, h=<tf.Tensor 'lstm/MultiRNNCellZeroState/BasicLSTMCellZeroState_1/zeros_1:0' shape=(25, 128) dtype=float32>))

In [18]:
no_of_batches=int(len(x)//BATCH_SIZE)
ptr=0
iteration=0

In [19]:
epochs = 100
# Save every N iterations
save_every_n = 200

model = CharRNN(vocab_size, batch_size=BATCH_SIZE, seq_length=SEQ_LENGTH,
                hidden_dim=HIDDEN_DIM, num_layers=LAYER_NUM, 
                learning_rate=LEARNING_RATE)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(epochs):
        new_state = sess.run(model.initial_state)
        for i in range(no_of_batches):
            if (ptr+BATCH_SIZE+1 >= batches): 
                ptr=0
            inputs,targets=x[ptr:ptr+BATCH_SIZE],y[ptr:ptr+BATCH_SIZE]
            ptr+=BATCH_SIZE
            start=time.time()
            feed = {model.inputs: inputs,
                    model.targets: targets,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            end = time.time()
            if(iteration%100==0):
                print('Epoch: %i/%i... '%(e+1, epochs),
                      'Training Step: %i... '%iteration,
                      'Training loss: %f... '%batch_loss,
                      '{:.4f} sec/batch'.format(end-start))
            iteration+=1
        if (e % 2 == 0):
            saver.save(sess, "checkk/i{}_l{}.ckpt".format(iteration, HIDDEN_DIM))
    
    saver.save(sess, "checkk/i{}_l{}.ckpt".format(counter, HIDDEN_DIM))

Epoch: 1/100...  Training Step: 0...  Training loss: 4.173634...  0.4713 sec/batch
Epoch: 1/100...  Training Step: 100...  Training loss: 3.276871...  0.1138 sec/batch
Epoch: 1/100...  Training Step: 200...  Training loss: 3.433408...  0.1312 sec/batch
Epoch: 1/100...  Training Step: 300...  Training loss: 3.417367...  0.1328 sec/batch
Epoch: 1/100...  Training Step: 400...  Training loss: 3.317386...  0.1303 sec/batch
Epoch: 1/100...  Training Step: 500...  Training loss: 3.230923...  0.1354 sec/batch
Epoch: 1/100...  Training Step: 600...  Training loss: 3.369838...  0.1332 sec/batch
Epoch: 1/100...  Training Step: 700...  Training loss: 3.472124...  0.1326 sec/batch
Epoch: 1/100...  Training Step: 800...  Training loss: 3.519495...  0.1306 sec/batch
Epoch: 2/100...  Training Step: 900...  Training loss: 3.652961...  0.1302 sec/batch
Epoch: 2/100...  Training Step: 1000...  Training loss: 3.326007...  0.1341 sec/batch
Epoch: 2/100...  Training Step: 1100...  Training loss: 3.685049..

KeyboardInterrupt: 

In [20]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [21]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(vocab_size, hidden_dim=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = char_id[c]
            feed = {model.inputs: x,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, vocab_size)
        samples.append(id_v[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds,vocab_size)
            samples.append(id_v[c])
        
    return ''.join(samples)

In [22]:
checkpoint = tf.train.latest_checkpoint('checkk/')
samp = sample(checkpoint, 2000, HIDDEN_DIM, vocab_size, prime="Far")
print(samp)

INFO:tensorflow:Restoring parameters from checkk/i4460_l128.ckpt
Far



zHz

??zH?d?
zzdd
z?


HHHz
z?z?
??d?z?dd


H?H
?

d??
z?
zH???ddz?zz?
d

?
H
?H?zz
z?
?
??z
dz
HHH
H?dHddzzdH
?


H
H?z?H??H
HzHH
dd?H

H?Hzzd
dz

HHH?

zzddzHz
zdH?
z
zd?
zH???
?
??
?dHzz???Hd?
z?zd
zH?HHzd
?
zd??H?
Hdd
z?H?z?
Hzzz

?z?H???H
z?d?HH
d

dH???????Hd??


zdzzz
?H??d?H

??H??dHHHz
?dz
ddz
z???H
??H?zH
?H
?
Hzzzzz
?zdzz?HHd??????

dzHz
d??
?
??H
??z?H?d??
?H
H?HH?
??z
H


?

??dd
zd






dz?
z?
?d?
z
Hd???d
zHzdzH?dz??zzdd?
?ddz?d??z?zHHHH?z?H
?ddd
zzzHz
?HzzH
Hz?d

zz?

dd
zdH
??
?
HH?Hd

zzHz?z?H


?zH?z???H?zH???dHHz
zz
?
d??dHzHzHz
dHzd
?z??zd
HH
dz?


?H

?
d
HHz?


?H?z
?
zHHzz??
Hz?Hz??H

HzH??HHHH?HzH
H
?zz?HHz
zz
?

d?z
?dHH?
d?
?HHdzz??

z?zd?z

ddz
??HH
H
ddH??

?
d
?
?z
?dz


?d
z?
HzH???
H?d??
HzzHd?d
z?HH
??Hz
Hz??d?z??zzHd??d?
H?z?



?


?z???z
d??H?
H


zdHdzd
H?
?
H
Hz
z?ddzzd
zddd
zdHzzH

H?
?
z
H
z???zz
H?HH?H
zH
dHzd?d
z
HHdz
z



Hd
?H
Hz
z??

zd
?

z


H?Hdzd
?





?d
zHH
z
dHz

In [35]:
id_v

{0: ':',
 1: 'W',
 2: "'",
 3: 'x',
 4: 'G',
 5: 'f',
 6: 'm',
 7: 'w',
 8: '&',
 9: 'a',
 10: 'n',
 11: 'q',
 12: '.',
 13: 't',
 14: '3',
 15: 'S',
 16: 'g',
 17: 'B',
 18: 'T',
 19: 'Y',
 20: 's',
 21: 'L',
 22: '\n',
 23: 'i',
 24: '?',
 25: '$',
 26: 'c',
 27: 'A',
 28: ';',
 29: '-',
 30: ' ',
 31: ',',
 32: 'R',
 33: 'y',
 34: 'N',
 35: '!',
 36: 'V',
 37: 'E',
 38: 'X',
 39: 'o',
 40: 'e',
 41: 'z',
 42: 'k',
 43: 'O',
 44: 'r',
 45: 'Q',
 46: 'd',
 47: 'Z',
 48: 'b',
 49: 'v',
 50: 'j',
 51: 'D',
 52: 'F',
 53: 'p',
 54: 'h',
 55: 'l',
 56: 'U',
 57: 'P',
 58: 'J',
 59: 'M',
 60: 'I',
 61: 'K',
 62: 'u',
 63: 'C',
 64: 'H'}