In [26]:
import os, sys, re, json, time, shutil
import itertools
import collections
from IPython.display import display
from tensorflow.python.ops import rnn, rnn_cell

# NLTK for NLP utils and corpora
import nltk

# NumPy and TensorFlow
import numpy as np
import tensorflow as tf

# Pandas because pandas are awesome, and for pretty-printing
import pandas as pd
# Set pandas floating point display
pd.set_option('float_format', lambda f: "{0:.04f}".format(f))

# Helper libraries for this notebook
import utils; reload(utils)
import vocabulary; reload(vocabulary)
import rnnlm; reload(rnnlm)

<module 'rnnlm' from 'rnnlm.pyc'>

In [27]:
def MakeFancyRNNCell(H, keep_prob, num_layers=1):

    cell = tf.nn.rnn_cell.BasicLSTMCell(H, forget_bias=0.0)
    cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)

    return cell

In [41]:
toy_corpus = "<s> Mary had a little lamb . <s> The lamb was white as snow . <s>"
toy_corpus = np.array(toy_corpus.split())

print "Input words:"
bi = utils.batch_generator(toy_corpus, batch_size=2, max_time=4)
for i, (w,y) in enumerate(bi):
    utils.pretty_print_matrix(w, cols=["w_%d" % d for d in range(w.shape[1])], dtype=object)
    print w

print "Target words:"
bi = utils.batch_generator(toy_corpus, batch_size=2, max_time=4)
for i, (w,y) in enumerate(bi):
    utils.pretty_print_matrix(y, cols=["y_%d" % d for d in range(w.shape[1])], dtype=object)
    print y

Input words:


Unnamed: 0,w_0,w_1,w_2,w_3
0,<s>,Mary,had,a
1,<s>,The,lamb,was


[['<s>' 'Mary' 'had' 'a']
 ['<s>' 'The' 'lamb' 'was']]


Unnamed: 0,w_0,w_1,w_2
0,little,lamb,.
1,white,as,snow


[['little' 'lamb' '.']
 ['white' 'as' 'snow']]
Target words:


Unnamed: 0,y_0,y_1,y_2,y_3
0,Mary,had,a,little
1,The,lamb,was,white


[['Mary' 'had' 'a' 'little']
 ['The' 'lamb' 'was' 'white']]


Unnamed: 0,y_0,y_1,y_2
0,lamb,.,<s>
1,as,snow,.


[['lamb' '.' '<s>']
 ['as' 'snow' '.']]


In [85]:
# Construct the training graph.
tf.reset_default_graph()

#Define parameters
V = 10000
H = 200
num_layers = 1

max_time = 20
batch_size = 50
learning_rate = 0.5
keep_prob = 1.0
num_epochs = 5

with tf.name_scope("Training_Parameters"):
    learning_rate_ = tf.constant(0.1, name="learning_rate")
    dropout_keep_prob_ = tf.constant(0.5, name="dropout_keep_prob")
    # For gradient clipping, if you use it.
    # Due to a bug in TensorFlow, this needs to be an ordinary python
    # constant.
    max_grad_norm_ = 5.0

    
# Input ids, with dynamic shape depending on input.
# Should be shape [batch_size, max_time] and contain integer word indices.
# Pre-defined constants from original code
input_w_ = tf.placeholder(tf.int32, [None, None], name="w")
initial_h_ = None
logits_ = None
target_y_ = tf.placeholder(tf.int32, [None, None], name="y")
loss_ = None

with tf.name_scope("batch_size"):
    batch_size_ = tf.shape(input_w_)[0]
with tf.name_scope("max_time"):
    max_time_ = tf.shape(input_w_)[1]

ns_ = tf.tile([max_time_], [batch_size_,], name="ns")




#Embedding Layer
import word_utils
reload(word_utils)
vocabulary = word_utils.Vocabulary(toy_corpus, V)
wordids = [vocabulary.to_id(word) for word in toy_corpus]

lookup_wordids = [[vocabulary.to_id(word) for word in y[0]]]

for i in range(1, y.shape[0]):
    lookup_wordids.append([vocabulary.to_id(word) for word in y[i]])

embedding_dim = H
m_ = tf.Variable(tf.random_uniform([V, embedding_dim], -1.0, 1.0), name="m")
x = tf.nn.embedding_lookup(m_, lookup_wordids)

#Weights and biases
weight = tf.Variable(tf.truncated_normal([H, max_time_], stddev=0.1))
bias = tf.Variable(tf.constant(0.1, shape=[max_time_]))


lstm_cell = MakeFancyRNNCell(H, keep_prob, num_layers=1)
outputs, states = rnn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)

pred = outputs[-1]

sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
predictions = sess.run(pred={x: w})









Tensor("embedding_lookup:0", shape=(2, 3, 200), dtype=float32)
Tensor("strided_slice:0", shape=(3, 200), dtype=float32)
Tensor("max_time/strided_slice:0", shape=(), dtype=int32)
