In [1]:
import tensorflow as tf
from seq2seq import Seq2seq

In [2]:
tf.__version__

'1.4.0'

In [3]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import logging
import tensorflow as tf
from seq2seq import Seq2seq
from data_handler import Data

FLAGS = tf.flags.FLAGS

# Model related
tf.flags.DEFINE_integer('num_units'         , 256           , 'Number of units in a LSTM cell')
tf.flags.DEFINE_integer('embed_dim'         , 256           , 'Size of the embedding vector')

# Training related
tf.flags.DEFINE_float('learning_rate'       , 0.001         , 'learning rate for the optimizer')
tf.flags.DEFINE_string('optimizer'          , 'Adam'        , 'Name of the train source file')
tf.flags.DEFINE_integer('batch_size'        , 32            , 'random seed for training sampling')
tf.flags.DEFINE_integer('print_every'       , 100           , 'print records every n iteration')
tf.flags.DEFINE_integer('iterations'        , 10000         , 'number of iterations to train')
tf.flags.DEFINE_string('model_dir'          , 'checkpoints_new' , 'Directory where to save the model')
tf.flags.DEFINE_string('experiment_dir'          , 'experiments' , 'Directory where to save the experiment')

tf.flags.DEFINE_integer('input_max_length'  , 30            , 'Max length of input sequence to use')
tf.flags.DEFINE_integer('output_max_length' , 30            , 'Max length of output sequence to use')
tf.flags.DEFINE_integer('max_length' , 30            , 'Max length of output sequence to use')

tf.flags.DEFINE_bool('use_residual_lstm'    , True          , 'To use the residual connection with the residual LSTM')

# Data related
tf.flags.DEFINE_string('input_filename', 'data/mscoco/train_source.txt', 'Name of the train source file')
tf.flags.DEFINE_string('output_filename', 'data/mscoco/train_target.txt', 'Name of the train target file')
tf.flags.DEFINE_string('vocab_filename', 'data/mscoco/train_vocab.txt', 'Name of the vocab file')
tf.flags.DEFINE_string('shuffled_filename', 'data/mscoco/train_target_shuffled.txt', 'Name of shuffled targets')

In [4]:
from tensorflow.contrib.learn import learn_runner

def run_experiment(argv=None):
    
    run_config = tf.contrib.learn.RunConfig()
    run_config = run_config.replace(model_dir=FLAGS.experiment_dir)
    
    learn_runner.run(experiment_fn=experiment_fn,
                     run_config=run_config,
                     schedule='train_and_evaluate'
                    )

In [5]:
def experiment_fn(run_config, params):
    data = Data(FLAGS)

    model = Seq2seq(data.vocab_size, FLAGS)
    estimator = tf.estimator.Estimator(model_fn=model.make_graph, 
#                                        model_dir=FLAGS.model_dir, 
                                       config=run_config,
                                       params=FLAGS)
    
    train_input_fn, train_feed_fn = data.make_input_fn('train')
    eval_input_fn, eval_feed_fn = data.make_input_fn('test')
    
    print_vars = [
        'source_ex',
        'target_ex',
        'predict'
        # 'decoder_output',
        # 'actual'
    ]
    print_inputs = tf.train.LoggingTensorHook(print_vars ,
                                              every_n_iter=FLAGS.print_every,
                                              formatter=data.get_formatter(['source_ex', 'target_ex', 'predict']))

    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=FLAGS.iterations,
        min_eval_frequency=FLAGS.print_every,
        train_monitors=[tf.train.FeedFnHook(train_feed_fn), print_inputs],
        eval_hooks=[tf.train.FeedFnHook(eval_feed_fn)],
        eval_steps=10
    )
    return experiment

In [None]:
run_experiment()

INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1198f8e90>, '_model_dir': 'experiments', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_save_summary_steps': 100, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_log_step_count_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_evaluation_master': '', '_master': ''}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into experiments/model.ckpt.
INFO:tensorflow:
****source_ex == a very clean and well decorated empty bathroom           
****target_ex == a young man is riding on a horse and enjoying her time    

INFO:tensorflow:global_step/sec: 0.868234
INFO:tensorflow:
****source_ex == rows of motorcycles are on either side of the street      
****target_ex == many people and line of parked scooters and motorcycles at night     
****predict == a man is on a motorcycle with a motorcycle   
INFO:tensorflow:loss = 8.94155, step = 1201 (115.176 sec)
INFO:tensorflow:global_step/sec: 0.99885
INFO:tensorflow:
****source_ex == a motorcycle with a bag on the back of it parked in the road  
****target_ex == a red stop sign sitting in the middle of a tall grass covered field  
****predict == a man is sitting on a motorcycle with a motorcycle 
INFO:tensorflow:loss = 7.94886, step = 1301 (100.115 sec)
INFO:tensorflow:global_step/sec: 0.981491
INFO:tensorflow:
****source_ex == the small motorcycles are lined up on the grass                    
****target_ex == this is an image of a row of motorcycles         
****predict == a man sitting on a motorcycle       
INFO:tensorflow:loss = 8.52021, step = 1401 (1

INFO:tensorflow:Validation (step 2900): source_loss = 3.8026, loss = 8.18589, target_loss = 3.61309, sim_loss = 0.770187, global_step = 2894
INFO:tensorflow:global_step/sec: 0.905278
INFO:tensorflow:
****source_ex == a train that is on the tracks next to the grass       
****target_ex == homemade chicken and curly noodle soup is chock full of carrots corn and peas     
****predict == a train is on the tracks near a train    
INFO:tensorflow:loss = 6.80607, step = 2901 (110.463 sec)
INFO:tensorflow:global_step/sec: 0.993812
INFO:tensorflow:
****source_ex == a couple of sheep stand on top of a hill        
****target_ex == three animals stand near a fence at the top of a small hill     
****predict == a group of sheep standing on a lush green field     
INFO:tensorflow:loss = 6.72689, step = 3001 (100.623 sec)
INFO:tensorflow:global_step/sec: 0.982514
INFO:tensorflow:
****source_ex == narrow city street with vehicles and pedestrian traffic         
****target_ex == a view of a city stree

INFO:tensorflow:Restoring parameters from experiments/model.ckpt-4636
INFO:tensorflow:Evaluation [1/10]
INFO:tensorflow:Evaluation [2/10]
INFO:tensorflow:Evaluation [3/10]
INFO:tensorflow:Evaluation [4/10]
INFO:tensorflow:Evaluation [5/10]
INFO:tensorflow:Evaluation [6/10]
INFO:tensorflow:Evaluation [7/10]
INFO:tensorflow:Evaluation [8/10]
INFO:tensorflow:Evaluation [9/10]
INFO:tensorflow:Evaluation [10/10]
INFO:tensorflow:Finished evaluation at 2018-01-23-16:24:24
INFO:tensorflow:Saving dict for global step 4636: global_step = 4636, loss = 6.8412, sim_loss = 0.793966, source_loss = 3.08148, target_loss = 2.96576
INFO:tensorflow:Validation (step 4700): source_loss = 3.08148, loss = 6.8412, target_loss = 2.96576, sim_loss = 0.793966, global_step = 4636
INFO:tensorflow:global_step/sec: 0.888462
INFO:tensorflow:
****source_ex == full perspective of a few creatures and single individual         
****target_ex == elephants standing near metal rails with their trunks near the ground        


INFO:tensorflow:loss = 4.1349, step = 6301 (100.042 sec)
INFO:tensorflow:Saving checkpoints for 6381 into experiments/model.ckpt.
INFO:tensorflow:Starting evaluation at 2018-01-23-16:53:34
INFO:tensorflow:Restoring parameters from experiments/model.ckpt-6381
INFO:tensorflow:Evaluation [1/10]
INFO:tensorflow:Evaluation [2/10]
INFO:tensorflow:Evaluation [3/10]
INFO:tensorflow:Evaluation [4/10]
INFO:tensorflow:Evaluation [5/10]
INFO:tensorflow:Evaluation [6/10]
INFO:tensorflow:Evaluation [7/10]
INFO:tensorflow:Evaluation [8/10]
INFO:tensorflow:Evaluation [9/10]
INFO:tensorflow:Evaluation [10/10]
INFO:tensorflow:Finished evaluation at 2018-01-23-16:53:40
INFO:tensorflow:Saving dict for global step 6381: global_step = 6381, loss = 5.91707, sim_loss = 1.13424, source_loss = 2.4778, target_loss = 2.30502
INFO:tensorflow:Validation (step 6400): source_loss = 2.4778, loss = 5.91707, target_loss = 2.30502, sim_loss = 1.13424, global_step = 6381
INFO:tensorflow:global_step/sec: 0.883528
INFO:tens

In [176]:
!head data/mscoco/train_source.txt

a very clean and well decorated empty bathroom
a bathroom with a border of butterflies and blue paint on the walls above it
a blue and white bathroom with butterfly themed wall tiles
an angled view of a beautifully decorated bathroom
a panoramic view of a kitchen and all of its appliances
a wide angle view of the kitchen work area
a panoramic photo of a kitchen and dining room
multiple photos of a brown and white kitchen
a graffiti ed stop sign across the street from a red car
a red stop sign with a bush bumper sticker under the word stop


In [201]:
!head data/mscoco/test_target.txt

a black metal bicycle with a clock inside the front wheel
a bicycle replica with a clock as the front wheel
a bicycle figurine in which the front wheel is replaced with a clock
the bike has a clock as a tire
a black honda motorcycle with a dark burgundy seat
a black honda motorcycle parked in front of a garage
ma motorcycle parked on the gravel in front of a garage
a honda motorcycle parked in a grass driveway
this is a blue and white bathroom with a wall sink and a lifesaver on the wall
a room with blue walls and a white sink and door


In [202]:
!head data/mscoco/test_target_shuffled.txt

a metal clock tower on a sidewalk in front of a very tall building
a group of boats on top of wet sand
a kitchen with wood floors and  green counter tops
a slice of pizza sitting on top of a paper plate
a man with safety equipment next to a fallen tree and red fire hydrant
three people are riding on an elephant's back through the jungle
a teddy bear resting on a pillow in a child's bedroom
a couple of people that are on their cell phone
a man wearing a purple shirt  black vest and tie posing for the camera
a person skating in very much snow with warm clothes


In [203]:
string = 'data/mscoco/test_target_shuffled.txt'

In [208]:
file1, file2, file3 = 'data/mscoco/test_target_shuffled.txt', 'data/mscoco/test_target_shuffled.txt', 'data/mscoco/test_target_shuffled.txt'

In [209]:
file1, file2, file3 = (re.sub('test', 'train', f) for f in (file1, file2, file3))

In [210]:
file1

'data/mscoco/train_target_shuffled.txt'

In [211]:
file2

'data/mscoco/train_target_shuffled.txt'

In [212]:
file3

'data/mscoco/train_target_shuffled.txt'

In [204]:
import re

In [206]:
re.sub('train', 'test', string)

'data/mscoco/test_target_shuffled.txt'

In [207]:
ls data/mscoco

test_source.txt            train_target.txt
test_target.txt            train_target_shuffled.txt
test_target_shuffled.txt   train_vocab.txt
train_source.txt


In [189]:
train_source = open('data/mscoco/train_source.txt').read().split('\n')

In [190]:
[i for i, sentence in enumerate(train_source) if sentence == 'young people and their groceries in a kitchen']

[12811]

In [198]:
train_source[12807:12813]

['a cooking range is sitting in the middle of the kitchen',
 'a group of pretty ladies and one man standing around a table',
 'several people standing around a table with bags on it',
 'woman and man around kitchen island with grocery bags and beverages',
 'young people and their groceries in a kitchen',
 'two dogs are sitting in a cart attached to a parked bicycle']

In [194]:
train_target = open('data/mscoco/train_target.txt').read().split('\n')

In [199]:
train_target[12807:12813]

['a stove is away from the wall in a kitchen area',
 'several people standing around a table with bags on it',
 'a group of pretty ladies and one man standing around a table',
 'young people and their groceries in a kitchen',
 'woman and man around kitchen island with grocery bags and beverages',
 'a bicycle leaning against a wall with a seat attachment behind it and two dogs sitting in the attachment']

In [180]:
test_source = open('data/mscoco/test_source.txt').read().split('\n')

In [181]:
test_target = open('data/mscoco/test_target.txt').read().split('\n')

In [188]:
len(test_source)

162024

In [200]:
import random
random.shuffle(test_target)
with open('data/mscoco/test_target_shuffled.txt', 'w') as f:
    for line in test_target:
        f.write(line + '\n')

In [187]:
for i in range(10):
    print test_source[i]
    print test_target[i]
    print '\n'


a bicycle replica with a clock as the front wheel
a black metal bicycle with a clock inside the front wheel


a black metal bicycle with a clock inside the front wheel
a bicycle replica with a clock as the front wheel


the bike has a clock as a tire
a bicycle figurine in which the front wheel is replaced with a clock


a bicycle figurine in which the front wheel is replaced with a clock
the bike has a clock as a tire


a black honda motorcycle parked in front of a garage
a black honda motorcycle with a dark burgundy seat


a black honda motorcycle with a dark burgundy seat
a black honda motorcycle parked in front of a garage


a honda motorcycle parked in a grass driveway
ma motorcycle parked on the gravel in front of a garage


ma motorcycle parked on the gravel in front of a garage
a honda motorcycle parked in a grass driveway


a room with blue walls and a white sink and door
this is a blue and white bathroom with a wall sink and a lifesaver on the wall


this is a blue and white b

In [116]:
sess.close()

In [117]:
tf.reset_default_graph()

In [118]:
from data_handler import Data

data = Data(FLAGS)
input_fn, feed_fn = data.make_input_fn()
features, _ = input_fn()
feed = feed_fn()

model = Seq2seq(data.vocab_size, FLAGS)

In [119]:
train_output_source, loss, source, target, label = model.make_graph(
        tf.estimator.ModeKeys.TRAIN, features, None, FLAGS
)

In [120]:
sess = tf.InteractiveSession()

In [121]:
sess.run(tf.global_variables_initializer())

In [168]:
out = sess.run(train_output_source.rnn_output, feed_dict={source: feed['source:0'],
                                  target: feed['target:0'],
                                  label: feed['label:0']})

In [169]:
actual = sess.run(source, feed_dict={source: feed['source:0'],
                                  target: feed['target:0'],
                                  label: feed['label:0']})

In [170]:
out.shape

(32, 16, 22946)

In [171]:
actual.shape

(32, 16)

In [172]:
feed = feed_fn()

In [173]:
seq_shape = out.shape[1]
lab_shape = actual.shape[1]
while seq_shape == lab_shape:
    feed = feed_fn()
    out = sess.run(train_output_source.rnn_output, feed_dict={source: feed['source:0'],
                                  target: feed['target:0'],
                                  label: feed['label:0']})
    actual = sess.run(source, feed_dict={source: feed['source:0'],
                                  target: feed['target:0'],
                                  label: feed['label:0']})
    seq_shape = out.shape[1]
    lab_shape = actual.shape[1]

In [174]:
out.shape

(32, 30, 22946)

In [175]:
lab_shape

31

In [102]:
cell = tf.contrib.rnn.LSTMCell(256)

In [105]:
decoder = tf.contrib.seq2seq.BasicDecoder(cell, helper=helper, initial_state=None)

In [107]:
decoder.output_size

BasicDecoderOutput(rnn_output=256, sample_id=TensorShape([]))

In [114]:
for a in actual:
    print a

[19359  7825 19450 22224 14156 14984  2327 21692     1     1     1     1
     1     1     1     1     1     1     1     1     1     1     1     1
     1     1]
[19359 21692   718 19359  8345 10744   917 22224  4184  9458  1656  7361
  9413  7133  8775     1     1     1     1     1     1     1     1     1
     1     1]
[19359  4184 22224 21969 21692   718 17692 20310 17065 14465     1     1
     1     1     1     1     1     1     1     1     1     1     1     1
     1     1]
[ 6335 21313 22784 10744 19359 18142 14984 21692     1     1     1     1
     1     1     1     1     1     1     1     1     1     1     1     1
     1     1]
[19359 10868 22784 10744 19359  1625 22224 16487 10744 17988 22253     1
     1     1     1     1     1     1     1     1     1     1     1     1
     1     1]
[19359 21974 17008 22784 10744  7361  1625 18667 19253     1     1     1
     1     1     1     1     1     1     1     1     1     1     1     1
     1     1]
[19359 10868 13155 10744 19359  1625 222

In [94]:
tout.shape

(32, 27)

In [77]:
actual.shape

(32, 26)

In [75]:
out.shape

(32, 19, 22946)

In [79]:
32*26

832

In [80]:
32*19

608

In [81]:
actual[0]

array([19359,  7825, 19450, 22224, 14156, 14984,  2327, 21692,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1])

In [56]:
for op in  sess.graph.get_operations():
    if re.search('laceholder', op.type):
        print op.name

source
target
label


In [53]:
train_output_source

BasicDecoderOutput(rnn_output=<tf.Tensor 'decode/decoder/transpose:0' shape=(?, ?, 22946) dtype=float32>, sample_id=<tf.Tensor 'decode/decoder/transpose_1:0' shape=(?, ?) dtype=int32>)

In [31]:
sess.close()

In [34]:
from tensorflow.contrib import layers

def encode(seq, reuse=None):
    input_lengths  = tf.reduce_sum(tf.to_int32(tf.not_equal(seq, 1)), 1)
    input_embed    = layers.embed_sequence(seq,
                                           vocab_size=vocab_size,
                                           embed_dim =embed_dim,
                                           scope = 'embed',
                                           reuse = reuse)
    cell = tf.contrib.rnn.LSTMCell(num_units=num_units, reuse=reuse)
    if FLAGS.use_residual_lstm:
        cell = tf.contrib.rnn.ResidualWrapper(cell)
    encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell, input_embed, dtype=tf.float32)
#     encoder_final_state = tf.concat(encoder_final_state, 1)
    return encoder_final_state

In [35]:
def decode(encoder_out, scope, output=None, mode='train', reuse=None):

    # From the encoder
    encoder_outputs = encoder_out[0]
    encoder_state = encoder_out[1]
    input_lengths = encoder_out[2]

    # Perform the embedding
    if mode=='train':
        if output is None:
            raise Exception('output must be provided for mode=train')
        train_output   = tf.concat([tf.expand_dims(start_tokens, 1), output], 1)
        output_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(train_output, 1)), 1)
        output_embed   = layers.embed_sequence(
            train_output,
            vocab_size=vocab_size,
            embed_dim = embed_dim,
            scope = 'encode/embed', reuse = True)

    # Prepare the helper
    if mode=='train':
        helper = tf.contrib.seq2seq.TrainingHelper(output_embed, output_lengths)
    if mode=='predict':
        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embeddings,
            start_tokens=tf.to_int32(start_tokens),
            end_token=1
            )
        
    with tf.variable_scope(scope, reuse=reuse):
        cell = tf.contrib.rnn.LSTMCell(num_units=num_units)
        out_cell = tf.contrib.rnn.OutputProjectionWrapper(cell, vocab_size, reuse=reuse)
        decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=out_cell, helper=helper,
            initial_state=encoder_state
            )
        outputs = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder, output_time_major=False,
            impute_finished=True, maximum_iterations=FLAGS.output_max_length)

        return outputs[0]


In [None]:
def seq_loss(self, decoding, actual):
        train_output = tf.concat([tf.expand_dims(self.start_tokens, 1), actual], 1)
        weights = tf.to_float(tf.not_equal(train_output[:, :-1], 1))
        # tf.identity(decoding.rnn_output[0], name='decoder_output')
        # tf.identity(actual[0], name='actual')
        loss = tf.contrib.seq2seq.sequence_loss(decoding.rnn_output,
                                                actual,
                                                # average_across_timesteps=True,
                                                # average_across_batch=True,
                                                weights=weights)
        return loss

In [36]:
embed_dim = params.embed_dim
num_units = params.num_units

# Data
source, target, label   = features['source'], features['target'], features['label']
batch_size     = tf.shape(source)[0]
start_tokens   = tf.zeros([batch_size], dtype= tf.int64)

with tf.variable_scope('encode'):
    source_encoder_out = encode(source)
    target_encoder_out = encode(target, reuse=True)

# Save embeddings
with tf.variable_scope('encode/embed', reuse=True):
    embeddings = tf.get_variable('embeddings')

# From the encoder
# encoder_state = source_encoder_out[1]

train_output   = tf.concat([tf.expand_dims(start_tokens, 1), source], 1)
output_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(train_output, 1)), 1)
output_embed   = layers.embed_sequence(
    train_output,
    vocab_size=vocab_size,
    embed_dim = embed_dim,
    scope = 'encode/embed', reuse = True)

helper = tf.contrib.seq2seq.TrainingHelper(output_embed, output_lengths)
with tf.variable_scope('decode', reuse=None):
    cell = tf.contrib.rnn.LSTMCell(num_units=num_units)
    out_cell = tf.contrib.rnn.OutputProjectionWrapper(cell, vocab_size, reuse=None)
    decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=out_cell, helper=helper,
        initial_state=source_encoder_out
        )

train_output_source = decode(source_encoder_out)

In [37]:
outputs = tf.contrib.seq2seq.dynamic_decode(
    decoder=decoder, output_time_major=False,
    impute_finished=True, maximum_iterations=FLAGS.output_max_length)


In [32]:
tf.reset_default_graph()

In [30]:
for op in  sess.graph.get_operations():
    if re.search('Variable', op.type):
        print op.name

encode/embed/embeddings
encode/rnn/lstm_cell/kernel
encode/rnn/lstm_cell/bias
decoder/output_projection_wrapper/lstm_cell/kernel
decoder/output_projection_wrapper/lstm_cell/bias
decoder/output_projection_wrapper/kernel
decoder/output_projection_wrapper/bias
