In [1]:
import tensorflow as tf
import numpy as np

from train import FLAGS
from seq2seq import Seq2seq
from data_handler import Data

run_config = tf.contrib.learn.RunConfig()
run_config = run_config.replace(model_dir=FLAGS.experiment_dir)

data = Data(FLAGS)
data.initialize_word_vectors()

model = Seq2seq(data.vocab_size, FLAGS, data.embeddings_mat)

The Data object creates feed functions for both placeholders and real data

In [2]:
input_fn, feed_fn = data.make_input_fn('train')

Here's what the placeholders look like

In [11]:
input_fn()

({'label': <tf.Tensor 'label_1:0' shape=(?,) dtype=float32>,
  'source_in': <tf.Tensor 'source_in_1:0' shape=(?, ?) dtype=int64>,
  'source_out': <tf.Tensor 'source_out_1:0' shape=(?, ?) dtype=int64>,
  'target_in': <tf.Tensor 'target_in_1:0' shape=(?, ?) dtype=int64>,
  'target_out': <tf.Tensor 'target_out_1:0' shape=(?, ?) dtype=int64>},
 None)

And here's what one batch of real data looks like

In [19]:
print feed_fn()

{'target_out:0': [[19359, 9336, 1255, 22406, 12883, 12163, 7361, 12943, 1, 1, 1, 1, 1, 1], [19359, 20980, 20233, 10854, 19111, 8773, 16176, 12065, 1, 1, 1, 1, 1, 1], [19359, 7567, 8566, 3151, 19111, 282, 19359, 21734, 8780, 8775, 1, 1, 1, 1], [525, 19102, 17431, 17459, 5930, 19359, 4511, 19500, 18964, 1535, 20446, 1, 1, 1], [19359, 18616, 10744, 19359, 12895, 6591, 8667, 15584, 7361, 21384, 1, 1, 1, 1], [15052, 6252, 1656, 4246, 2152, 20588, 3578, 4843, 1, 1, 1, 1, 1, 1], [19359, 6591, 8667, 718, 21384, 8773, 11365, 12895, 1, 1, 1, 1, 1, 1], [7361, 379, 10744, 19359, 12895, 6591, 8667, 8773, 7361, 16125, 1, 1, 1, 1], [19359, 14713, 6781, 8780, 19359, 21969, 1625, 718, 22406, 20040, 8709, 1, 1, 1], [19359, 14713, 6781, 8780, 19359, 21969, 1625, 718, 22406, 20040, 8709, 1, 1, 1], [12514, 14713, 718, 8709, 20040, 6781, 8780, 7361, 1625, 1, 1, 1, 1, 1], [19359, 7202, 1656, 17988, 18083, 8780, 7361, 769, 1, 1, 1, 1, 1, 1], [15761, 22224, 6335, 6051, 8780, 19359, 8295, 12974, 548, 4507, 1, 1

In [16]:
batch = feed_fn()

In [17]:
batch['source_in:0'][0]

[15052,
 11809,
 19359,
 10800,
 6166,
 8780,
 19706,
 10744,
 19359,
 14818,
 1,
 1,
 1,
 1]

In [18]:
[data.rev_vocab[t] for t in batch['source_in:0'][0]]

['two',
 'bicycles',
 'a',
 'woman',
 'walking',
 'in',
 'front',
 'of',
 'a',
 'shop',
 '</S>',
 '</S>',
 '</S>',
 '</S>']

The seq2seq object controls building the entire graph

In [4]:
features, _ = input_fn()

In [24]:
features

{'label': <tf.Tensor 'label_3:0' shape=(?,) dtype=float32>,
 'source_in': <tf.Tensor 'source_in_3:0' shape=(?, ?) dtype=int64>,
 'source_out': <tf.Tensor 'source_out_3:0' shape=(?, ?) dtype=int64>,
 'target_in': <tf.Tensor 'target_in_3:0' shape=(?, ?) dtype=int64>,
 'target_out': <tf.Tensor 'target_out_3:0' shape=(?, ?) dtype=int64>}

In [25]:
graph_fn_out = model.make_graph(tf.estimator.ModeKeys.TRAIN, features, None, FLAGS)

In [26]:
graph_fn_out

EstimatorSpec(mode='train', predictions=<tf.Tensor 'decode_2/decoder/transpose_1:0' shape=(?, ?) dtype=int32>, loss=<tf.Tensor 'add_5:0' shape=() dtype=float32>, train_op=<tf.Tensor 'OptimizeLoss/control_dependency:0' shape=() dtype=float32>, eval_metric_ops={'source_loss': (<tf.Tensor 'mean/value:0' shape=() dtype=float32>, <tf.Tensor 'mean/update_op:0' shape=() dtype=float32>), 'target_loss': (<tf.Tensor 'mean_1/value:0' shape=() dtype=float32>, <tf.Tensor 'mean_1/update_op:0' shape=() dtype=float32>), 'sim_loss': (<tf.Tensor 'mean_2/value:0' shape=() dtype=float32>, <tf.Tensor 'mean_2/update_op:0' shape=() dtype=float32>)}, export_outputs=None, training_chief_hooks=(), training_hooks=(), scaffold=<tensorflow.python.training.monitored_session.Scaffold object at 0x12831eb50>, evaluation_hooks=())

Right now, `model.make_graph` returns an `EstimatorSpec`, but we can easily comment that out and replace it with a return function that looks like
```python
return train_output_source, source_in, source_out, target_in, target_out, label

```

In [30]:
tf.reset_default_graph()

In [5]:
train_output_source, source_in, source_out, target_in, target_out, label = model.make_graph(tf.estimator.ModeKeys.TRAIN, features, None, FLAGS)

If the previous line fails, you might need to restart the kernel and run it again (along with ONLY its predecessor cells that it depends on)

Now we have some of the tensors that the model actually uses during training

In [7]:
print source_in

Tensor("source_in:0", shape=(?, ?), dtype=int64)


In [8]:
print train_output_source

BasicDecoderOutput(rnn_output=<tf.Tensor 'decode/decoder/transpose:0' shape=(?, ?, 22946) dtype=float32>, sample_id=<tf.Tensor 'decode/decoder/transpose_1:0' shape=(?, ?) dtype=int32>)


We'll have to use a feed dict in order to see what they look like inside of a session

In [9]:
batch = feed_fn()

Start of Epoch 1


In [10]:
d = {
    source_in: batch['source_in:0'],
    source_out: batch['source_out:0'],
    target_in: batch['target_in:0'],
    target_out: batch['target_out:0'],
    label: batch['label:0']
}

In [18]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    decoder_output = sess.run(train_output_source.rnn_output, feed_dict=d)

In [19]:
decoder_output.shape

(32, 19, 22946)

In [21]:
decoder_output[:, 0, 0]

array([ 0.01321699,  0.01082042,  0.01030187,  0.00840517, -0.00436003,
       -0.00350835,  0.00080211,  0.01147799,  0.01700433,  0.01687415,
        0.01302104,  0.01692699,  0.01697068,  0.00232103,  0.00269236,
        0.01225354,  0.00372441,  0.01017307, -0.00699852, -0.00093136,
        0.00837284,  0.00770196, -0.01195256,  0.00625178,  0.00455421,
        0.02162324,  0.00621916,  0.01318047,  0.03228229,  0.01616982,
        0.01025306,  0.01015125], dtype=float32)