In [0]:
# load package 
import time
import numpy as np
import tensorflow as tf

In [0]:
# read in text for training 
with open('sample_data/shakespeare.txt', 'r') as f:
    text = f.read()

In [0]:
# construct char set  
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate( vocab)}
int_to_vocab = dict( enumerate( vocab) )

In [0]:
# encode text 
encoded = np.array(  [ vocab_to_int[c] for c in text ], dtype =np.int32 )

In [92]:
# test 
print( encoded[:10] )
print( text[:10] )
print( vocab_to_int['F'] )

[47 54 63 35 24 48  0 54 24 54]
First Citi
47


In [0]:
# get bactch
def get_batches( arr, num_seqs, num_steps):
    batch_size = num_seqs * num_steps 
    n_batch = int(  len(arr) / batch_size )
    arr = arr[ : (batch_size * n_batch )] # keep only full batch size part 
    # reshape 
    arr = arr.reshape( (num_seqs, -1)  )
    
    for n in range(0, arr.shape[1] , num_steps):
        x  = arr[ :, n : ( n+num_steps )]
        y  = np.zeros_like(x)
        y[: , :-1] , y[:, -1] = x[: , 1: ], y[: , 0]
        # generating x, y only once, no memory storage using generator yield 
        yield x ,y 

        

In [0]:
#tmp = get_batches(encoded , num_seqs = 10, num_steps = 7)
#counter = 0
#for x, y in tmp:
#    print("x\n", x.shape, '\n' , x)
#    print("\ny\n", y.shape, '\n',  x)
#    counter = counter + 1 
#    if counter > 3:
#       break 

In [0]:
# create  placeholders 
def build_inputs( num_seqs, num_steps ):
    # placeholder for input: size num_seqs
    inputs = tf.placeholder(tf.int32, shape= ( num_seqs, num_steps), name = 'inputs')
    targets = tf.placeholder(tf.int32, shape= ( num_seqs, num_steps), name = 'targets')
    
    # keep probability for dropout model use 
    keep_prob = tf.placeholder( tf.float32, name = 'keep_prob')
    
    return inputs, targets, keep_prob

#i, t, k = build_inputs( 3, 5)


In [0]:
# build LSTM 
def make_cell(lstm_size):
    return tf.nn.rnn_cell.BasicLSTMCell(lstm_size, state_is_tuple=True)

def build_lstm( lstm_size, num_layers, num_seqs, keep_prob):
    # lstm_size: num hidden units 
    # num  layers: multiple layers/cells of LSTM 
    lstm = tf.nn.rnn_cell.BasicLSTMCell( lstm_size)
    
    # dropout to avoid overfitting 
    lstm_drop =tf.nn.rnn_cell.DropoutWrapper( lstm, output_keep_prob= keep_prob) 
    
    # multi-layer lstm : stack multiple lstm_drop cells as one lstm cell
    cell = tf.nn.rnn_cell.MultiRNNCell([make_cell(lstm_size) for _ in range(num_layers)], 
                                state_is_tuple=True)
    #cell = tf.nn.rnn_cell.MultiRNNCell( [ lstm_drop for _ in range(num_layers)]  )
    initial_state = cell.zero_state(num_seqs, tf.float32)
    
    return cell, initial_state 

In [0]:
# build output 
def build_output( lstm_output, in_size , out_size ):
    # in_size: size of lstm output 
    # out_size: num of classes to predict e.g. len( vocab )
    seq_output = tf.concat( lstm_output  , axis=1)
    x = tf.reshape( seq_output , [-1, in_size])
    
    # create Variable for softmax 
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable( tf.truncated_normal( [in_size, out_size], stddev=0.1 )  )
        softmax_b = tf.Variable( tf.zeros( out_size ))
        
    logits = tf.matmul(  x, softmax_w) + softmax_b 
    
    out = tf.nn.softmax( logits, name = 'prediction' )
    
    return  out, logits
    
    

In [0]:
# loss function 
def build_loss( logits, targets, lstm_size, num_classes  ):
    # one hot encoding to a binary vector of size num_classes, where position of 1 indicate value of targets 
    # num_classes: vocab_size 
    
    y_one_hot = tf.one_hot( targets , num_classes )
    y_reshaped = tf.reshape( y_one_hot, logits.get_shape() )
    
    # logits and targets for loss value 
    loss = tf.nn.softmax_cross_entropy_with_logits( logits=logits,labels = y_reshaped )
    loss = tf.reduce_mean(loss)
    
    return loss 
    
    
    

In [0]:
# build optimizer 
def build_optimizer(  loss , learning_rate, grad_clip):
    tvars = tf.trainable_variables( )
    grads, _ = tf.clip_by_global_norm(   tf.gradients(loss, tvars )  , grad_clip )
    train_op = tf.train.AdamOptimizer( learning_rate=learning_rate )
    optimizer  = train_op.apply_gradients(  zip( grads, tvars ))
    
    return optimizer 

In [0]:
# construct CharRNN 
class CharRNN:
    def __init__( self, num_classes, num_seqs=32, num_steps = 50 , 
                 lstm_size=128 , num_layers = 2, learning_rate = 0.001, 
                grad_clip = 5) :
        tf.reset_default_graph()
        
        # inputs 
        self.inputs, self.targets, self.keep_prob = build_inputs( num_seqs, num_steps )
        
        # LSTM   lstm_size, num_layers, batch_size, keep_prob
        cell, self.initial_state = build_lstm( lstm_size, num_layers , num_seqs, self.keep_prob   )
        
        # one hot encoding 
        x_one_hot = tf.one_hot( self.inputs, num_classes)
        
        # run RNN 
        outputs, state = tf.nn.dynamic_rnn( cell, x_one_hot , initial_state=self.initial_state)
        self.final_state = state 
        
        # prediction  lstm_output, in_size , out_size 
        self.prediction, self.logits = build_output( outputs, lstm_size, num_classes )
        
        # loss: logits, targets, lstm_size, num_classes
        self.loss  = build_loss( self.logits,  self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss , learning_rate, grad_clip  )
        
        
    


In [0]:
# training model 
# hyper parameter setup 
num_seqs =  100
num_steps = 100
lstm_size = 512
num_layers =2
learning_rate = 0.001
keep_prob = 0.5

In [0]:
# create folders 
!mkdir -p checkpoints/shake
!mkdir -p logs/2

In [0]:
# run model 
epochs = 10 

# save variables every n 
save_every_n = 200

model  = CharRNN(  num_classes = len(vocab), num_seqs=num_seqs, num_steps = num_steps , 
                 lstm_size= lstm_size , num_layers = num_layers, learning_rate = learning_rate )

saver = tf.train.Saver( max_to_keep = 100)


In [104]:

with tf.Session( ) as sess:
    sess.run( tf.global_variables_initializer( ))
    
    counter = 0 
    for  e in range(epochs):
        # train network 
        new_state = sess.run( model.initial_state)
        loss = 0 
        for x, y, in get_batches(  encoded, num_seqs, num_steps):
            counter = counter + 1
            start = time.time()
            feed = { model.inputs : x, model.targets: y, model.keep_prob : keep_prob, model.initial_state: new_state}
            batch_loss, new_state , _  = sess.run( [ model.loss, model.final_state, model.optimizer ] , feed_dict = feed )
            
            end  =time.time()
        
            if counter % 100 == 0:
                print( 'epochs: {}/{}'.format( e+ 1, epochs) ,
                      'counter: {}'.format( counter) ,
                      'loss:'.format(batch_loss), 
                      'time: {} /batch'.format( end -start ) 
                     )

            if counter % save_every_n == 0:
                saver.save( sess, 'checkpoints/shake/i{}_l{}.ckpt' .format( counter, lstm_size ))
    


epochs: 1/10 counter: 100 loss: time: 0.12998580932617188 /batch
epochs: 1/10 counter: 200 loss: time: 0.1343071460723877 /batch
epochs: 1/10 counter: 300 loss: time: 0.1359562873840332 /batch
epochs: 1/10 counter: 400 loss: time: 0.1449418067932129 /batch
epochs: 2/10 counter: 500 loss: time: 0.13666486740112305 /batch
epochs: 2/10 counter: 600 loss: time: 0.1373450756072998 /batch
epochs: 2/10 counter: 700 loss: time: 0.13715505599975586 /batch
epochs: 2/10 counter: 800 loss: time: 0.13495922088623047 /batch
epochs: 2/10 counter: 900 loss: time: 0.13182854652404785 /batch
epochs: 3/10 counter: 1000 loss: time: 0.14131760597229004 /batch
epochs: 3/10 counter: 1100 loss: time: 0.13471174240112305 /batch
epochs: 3/10 counter: 1200 loss: time: 0.13852190971374512 /batch
epochs: 3/10 counter: 1300 loss: time: 0.13552355766296387 /batch
epochs: 4/10 counter: 1400 loss: time: 0.13737893104553223 /batch
epochs: 4/10 counter: 1500 loss: time: 0.1370079517364502 /batch
epochs: 4/10 counter: 16

In [105]:
tf.train.get_checkpoint_state('checkpoints/shake')

model_checkpoint_path: "checkpoints/shake/i4400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i1000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i1200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i1400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i1600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i1800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i2000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i2200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i2400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i2600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i2800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/shake/i3

In [0]:
def pick_top_n(preds, vocab_size, top_n=2):

    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [0]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    # 1 to 1 prediction 
    model = CharRNN(num_classes = len(vocab), num_seqs=1, num_steps = 1 , 
                 lstm_size= lstm_size , num_layers = num_layers, learning_rate = learning_rate  )
    saver = tf.train.Saver()
    with tf.Session() as sess:
       
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        
        samples.append(int_to_vocab[c])
        
        
        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [111]:
checkpoint = "checkpoints/shake/i4400_l512.ckpt"
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="I will not do that")
print("\n The follows are the generated texts:\n\n\n")
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/shake/i4400_l512.ckpt

 The follows are the generated texts:



I will not do that I would say
'The true and trum of times to thee.

PRINCESS:
What say you that the sun at home?

LUCIO:
It is the sun that should not think on the state.

SIMONIDES:
What is your charge?

LEONTES:
What, art thou not to be a stranger to
The suit of the contraction?

POINS:
Well, I will see the truth into the sea, and so
despite the senators of the world that would not
be said to this person.

SIR HUGH EVANS:
Why, there is such a soldier that you should be said
to take the secrets of a common.

Second Citizen:
I think the true send foolish shows and the rest
and to the moon and the man that I will not
see him as the man that I will not be so are
that I will not see you all. I will not be a subject
that I am a gentleman that I would say they were
dispatch'd to the people.

PORTIA:
I heard you were as like a common proper.

SIR TOBY BELCH:
We have sent to 