In [1]:
import tensorflow as tf
from tensorflow.python.ops import lookup_ops
from tensorflow.contrib import data
import os

In [2]:
OUT_DATA_PATH = '../data'
vocab_file = os.path.join(OUT_DATA_PATH, 'vocab100k.txt')

In [3]:
train_ctx = os.path.join(OUT_DATA_PATH, 'train.ctx')
train_utterance = os.path.join(OUT_DATA_PATH, 'train.utterance')
train_label = os.path.join(OUT_DATA_PATH, 'train.label')

In [4]:
vocab_table = lookup_ops.index_table_from_file(vocab_file, default_value=0)
def create_train_dataset_iterator(train_ctx, train_utterance, train_label, vocab_table):
    ctx_dataset = data.TextLineDataset(train_ctx)
    utterance_dataset = data.TextLineDataset(train_utterance)
    label_dataset = data.TextLineDataset(train_label)
    joint_dataset = data.Dataset.zip((ctx_dataset, utterance_dataset, label_dataset))

    joint_dataset = joint_dataset.map(lambda ctx, utterance, label: (tf.string_split([ctx]).values, tf.string_split([utterance]).values, tf.string_to_number(label)))
    
    joint_dataset = joint_dataset.map(lambda ctx, utterance, label: (vocab_table.lookup(ctx), vocab_table.lookup(utterance), label))
    joint_dataset = joint_dataset.map(lambda ctx, utterance, label: ctx, tf.size(ctx),)
    
    joint_dataset = joint_dataset.padded_batch(32, padded_shapes=(tf.TensorShape([None]), 
                                                          tf.TensorShape([None]), tf.TensorShape([])))
    iterator = joint_dataset.make_initializable_iterator()
    return iterator

In [5]:
V = 100000
d = 300
W = tf.get_variable('embedding', shape=[V, d])

In [6]:
#Define placeholders for input
context = tf.placeholder(tf.int64, shape=[32, 20])
utterance = tf.placeholder(tf.int64, shape=[32, 20])
label = tf.placeholder(tf.float32)

In [7]:
iterator = create_train_dataset_iterator(train_ctx, train_utterance, train_label, vocab_table)
context_vector = tf.nn.embedding_lookup(W, context)

In [8]:
sess = tf.Session()

In [9]:
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
sess.run(iterator.initializer)

In [10]:
batch_ctx, batch_utterance, _ = sess.run(iterator.get_next())

In [11]:
print (batch_ctx.shape)

(32, 372)


In [None]:
cv = sess.run(context_vector, feed_dict={context: batch_ctx})

In [None]:
print (cv.shape)

In [12]:
print(context_vector)

Tensor("embedding_lookup:0", shape=(32, 20, 300), dtype=float32)


In [13]:
with tf.variable_scope('rnn') as scope:
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(d)
    init_state = tf.zeros([32, d])
    _, c = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=context_vector, dtype=tf.float32)

In [14]:
print (c)

LSTMStateTuple(c=<tf.Tensor 'rnn/rnn/while/Exit_2:0' shape=(32, 300) dtype=float32>, h=<tf.Tensor 'rnn/rnn/while/Exit_3:0' shape=(32, 300) dtype=float32>)


In [15]:
utterance_vector = tf.nn.embedding_lookup(W, utterance)
with tf.variable_scope('rnn', reuse=True) as scope:
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(d)
    init_state = tf.zeros([32, d])
    _, u = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=utterance_vector, dtype=tf.float32)

In [16]:
print (u)

LSTMStateTuple(c=<tf.Tensor 'rnn_1/rnn/while/Exit_2:0' shape=(32, 300) dtype=float32>, h=<tf.Tensor 'rnn_1/rnn/while/Exit_3:0' shape=(32, 300) dtype=float32>)
