This notebook is for sampling sequence of words.
To run more than once, restart the kernel.

#### 1. Import Libraries
The first cell imports libraries. This code uses Tensorflow version 0.9. If Tensorflow version is not the same, the code may not work as expected.
Make sure Tensorflow environment is on before starting this notebook.

```
> source activate tensorflow
```

Some libraries need to be install after Tensorflow enviroment starts.

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import os
import random
import time
from six.moves import cPickle

import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn

logging = tf.logging

#### 2. Model definition
This is a model definition. The model uses LSTM (long short term memory) cell, which is stacked up by config.num_layers.

In [3]:
class Model(object):
    """Model definition"""

    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        hidden_size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # gets LSTM cell
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=config.forget_bias)
        # dropout setting, increase/decrease number of neurons
        if is_training and config.keep_prob < 1:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell,
                                                      output_keep_prob=config.keep_prob)
        # stacks up cells
        if config.num_layers == 0:
            print("Basic LSTM Cell")
            self._cell = cell = lstm_cell
        else:
            print("Multi RNN Cell of " + str(config.num_layers))
            self._cell = cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        # looks up embeddings and gets input
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, hidden_size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        # calculates dropouts.
        if is_training and config.keep_prob < 1:
            # the second argument is a probablitiy of keep_prob.
            # outputs will be scaled by 1/keep_prob.
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        state = self._initial_state
        with tf.variable_scope("RNN"):
            x = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)]
            # updates a state and computes outputs
            outputs, state = rnn.rnn(cell, x, initial_state=state)

        output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
        # softmax of weights
        softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size])
        # softmax of bias
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        # caclulates y = Wx + b (score y is called logits)
        logits = tf.matmul(output, softmax_w) + softmax_b
         # computes softmax cross entropy loss of batch_size
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits],
            [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        # gets scalar value of normalized loss
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state
        self._probs = tf.nn.softmax(logits)

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        # optimazation
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_optimizer = optimizer.apply_gradients(zip(grads, tvars))
        
        # updates embeddings. computes similarity based on cosine distance
        norm = tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keep_dims=True))
        self._normalized_embeddings = embedding / norm

    def assign_lr(self, session, lr_value):
        session.run(tf.assign(self.lr, lr_value))

    @property
    def input_data(self):
        return self._input_data

    @property
    def targets(self):
        return self._targets

    @property
    def initial_state(self):
        return self._initial_state

    @property
    def cost(self):
        return self._cost

    @property
    def cell(self):
        return self._cell

    @property
    def final_state(self):
        return self._final_state

    @property
    def probs(self):
        return self._probs

    @property
    def lr(self):
        return self._lr

    @property
    def train_optimizer(self):
        return self._train_optimizer
    
    # sampling function to generate a sequence of words
    def sample(self, session, words, vocab, num=200, prime=' ', sampling_type=1):
        state = session.run(self.cell.zero_state(1, tf.float32))
        if not len(prime) or prime == " ":
            prime  = random.choice(list(vocab.keys()))    
        print (prime)
        for word in prime.split()[:-1]:
            print (word)
            x = np.zeros((1, 1))
            x[0, 0] = vocab.get(word,0)
            feed = {self.input_data: x, self.initial_state: state}
            [state] = session.run([self.final_state], feed)
         
        def weighted_pick(weights):
            t = np.cumsum(weights)
            s = np.sum(weights)
            return(int(np.searchsorted(t, np.random.rand(1)*s)))

        ret = prime
        word = prime.split()[-1]
        for n in range(num):
            x = np.zeros((1, 1))
            x[0, 0] = vocab.get(word,0)
            feed = {self.input_data: x, self.initial_state: state}
            [probs, state] = session.run([self.probs, self.final_state], feed)
            p = probs[0]

            if sampling_type == 0:
                sample = np.argmax(p)
            else: # sampling_type == 1 default:
                sample = weighted_pick(p)

            pred = words[sample]
            ret += ' ' + pred
            word = pred
        return ret

#### 3. Configurations
Config class defines configuration parameters.

In [6]:
class Config(object):
    init_scale = 0.1       # the initial random range of the weights
    learning_rate = 1.0    # the initial value of a learning rate
    max_grad_norm = 5      # the maximum permissible norm of the gradient
    num_layers = 3         # the number of LSTM layers
    num_steps = 20         # the number of unrolled steps of LSTM
    hidden_size = 400      # the number of LSTM units (neurons)
    forget_bias= 0.5       # the biases of the forget gate
    max_epoch = 6          # the number of epochs with the initial learning rate
    max_max_epoch = 18     # the total number of epochs
    keep_prob = 0.5        # the probability of keeping weights in dropout layer
    lr_decay = 0.6         # the learning rate defay factor
    batch_size = 20        # the batch size

#### 4. Sequence of words
The function below generates sequence of words. We can try this part many times. When the sampling type is 1(weight pick), the result varies.

In [5]:
save_dir = "saved"

# num: the number of words to generate
# prime: the seed word(s)
# sampling_type: sampling type, 0: argmax, 1: weight pick
def sample(num, prime, sampling_type):
    with open(os.path.join(save_dir, 'words_vocab.pkl'), 'rb') as f:
        words, vocab = cPickle.load(f)
    sample_config = Config()
    sample_config.vocab_size = len(vocab)
    sample_config.batch_size=1
    sample_config.num_steps =1
    initializer = tf.random_uniform_initializer(-sample_config.init_scale,
                                                sample_config.init_scale)
    with tf.variable_scope("model", reuse=None, initializer=initializer):
        model = Model(is_training=True, config=sample_config)
    with tf.Session() as session:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(session, ckpt.model_checkpoint_path)
            print(model.sample(session, words, vocab, num, prime, sampling_type))
            
sample(200, "Elizabeth", 1)



Multi RNN Cell of 3
Elizabeth
Elizabeth laugh to which any other, that stopt this think?" "Oh, "Yes--no--never young man of sure and I really ever could disapprove for me." CHAPTER paused. I can't only be happy to extraordinary on all the name of your pleasures, secret or very unreasonable; everybody am as going off, to confession at both of Miss Woodhouse, it I am afraid suppose; and his modesty, to-morrow of us first--she has the table) which gave a mother of it: her would be reasonably afraid of explain:--there "Oh!" I think, not altogether to be out of your side, by She as this, but you know." Emma saw his comparison without herself. "I lives, could another, cannot think on my sister's poor earl's creature!" said to inquiry, and I knew unlike his continuance in me: not not to have heard himself talking, I would not even happen if your heart to me as assuring you to convince you of Mr. Elton. I understood him at last address.-- "Yes, she has been half off Carter and expressing his p