In [1]:
import numpy as np
import tensorflow as tf

In [2]:
####
# Sequence Labeling with variable-length sequences
####

import os
from datetime import datetime

tf.reset_default_graph()

NUM_HIDDEN = 5
NUM_CLASS = 3
FEATURE_SIZE_PER_TIMESTEP = 5

### Data pipeline
def input_pipeline(filename, batch_size, epochs=None):
    file_list = [os.path.join(os.getcwd(), filename)]
    file_queue = tf.train.string_input_producer(file_list, num_epochs=epochs)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)
    sequence_features = {
        "inputs": tf.FixedLenSequenceFeature([FEATURE_SIZE_PER_TIMESTEP], dtype=tf.float32),
        "labels": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }
    _, sequence = tf.parse_single_sequence_example(
        serialized=serialized_example,
        sequence_features=sequence_features)

    actual_length = tf.shape(sequence["inputs"])[0]
    batch_lengths, batch_sequences, batch_labels = tf.train.batch(
        [actual_length, sequence["inputs"], sequence["labels"]],
        batch_size=batch_size,
        dynamic_pad=True,
        allow_smaller_final_batch=True,
        name="input_batching")
    return batch_lengths, batch_sequences, batch_labels


### Build Model
def inference(inputs, actual_lengths):
    cell = tf.contrib.rnn.LSTMCell(NUM_HIDDEN)
    outputs, current_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32, sequence_length=actual_lengths)
    max_length = tf.shape(outputs)[1]
    # Output layer weights & biases
    weights = tf.Variable(tf.truncated_normal([NUM_HIDDEN, NUM_CLASS]), dtype=tf.float32)
    biases = tf.Variable(tf.constant(0.1, shape=[NUM_CLASS]), dtype=tf.float32)
    # Flatten to apply same weights to all time steps.
    outputs_flat = tf.reshape(outputs, [-1, NUM_HIDDEN])
    logits_flat = tf.add(tf.matmul(outputs_flat, weights), biases)
    predictions_flat = tf.nn.softmax(logits_flat)
    logits = tf.reshape(logits_flat, [-1, max_length, NUM_CLASS])
    predictions = tf.reshape(predictions_flat, [-1, max_length, NUM_CLASS])
    return logits, predictions


## Cost function
def loss(logits, labels, actual_lengths):
    logits_flat = tf.reshape(logits, [-1, NUM_CLASS])
    labels_flat = tf.reshape(labels, [-1])
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_flat, labels=labels_flat)
    # Mask the losses via sequence length
    max_length = tf.shape(labels)[1]
    mask = tf.sequence_mask(actual_lengths, max_length, dtype=tf.float32)
    mask = tf.reshape(mask, [-1])
    masked_losses = tf.multiply(mask, losses)
    masked_losses = tf.reshape(masked_losses, tf.shape(labels))
    # Calculate mean loss
    mean_loss_by_example = tf.reduce_sum(masked_losses, axis=1) / tf.cast(actual_lengths, tf.float32)
    mean_loss = tf.reduce_mean(mean_loss_by_example)
    return mean_loss

    
## Error tracking 
def error(predictions, labels, actual_lengths):
    predictions_flat = tf.reshape(predictions, [-1, NUM_CLASS])
    labels_flat = tf.reshape(labels, [-1])
    errors = tf.not_equal(labels_flat, tf.argmax(predictions_flat, 1))
    errors = tf.cast(errors, tf.float32)
    max_length = tf.shape(labels)[1]
    mask = tf.sequence_mask(acutal_lengths, max_length, dtype=tf.float32)
    mask = tf.reshape(mask, [-1])
    masked_errors = mask * errors
    masked_errors = tf.reshape(masked_errors, tf.shape(labels))
    mean_error_by_example = tf.reduce_sum(masked_errors, axis=1) / actual_lengths
    mean_error = tf.reduce_mean(mean_error_by_example)
    return mean_error
    
# # Calculate the losses ver 2
# cross_entropy = tf.one_hot(y_flat, NUM_CLASS) * tf.log(probs_flat)
# cross_entropy = -tf.reduce_sum(cross_entropy, axis=1)
# masked_ce = mask * cross_entropy
# masked_ce = tf.reshape(masked_ce, tf.shape(y))
# mean_ce_by_example = tf.reduce_sum(masked_losses, axis=1) / example_len
# mean_ce = tf.reduce_mean(mean_ce_by_example)

def training(loss, learning_rate):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)    
    return train_op

### Training
NUM_EPOCHS = 100
BATCH_SIZE = 3
DISPLAY_STEP = 5
LEARNING_RATE = 1e-3
TRAINING_SET_SIZE = 7

filename = 'Sequence_labeling.tfr'
with tf.Graph().as_default():
    tf.set_random_seed(10)
    np.random.seed(10)    
    # Build Graph
    lengths, sequences, labels = input_pipeline(filename, BATCH_SIZE)
    logits, _ = inference(sequences, lengths)
    avg_loss = loss(logits, labels, lengths)
    train_op = training(avg_loss, LEARNING_RATE)
    
    # Create & Initialize Session
    sess = tf.Session()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    # Start QueueRunner
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    try: 
        # Training cycles
        for epoch in range(1, NUM_EPOCHS+1):
            epoch_avg_loss = 0.0
            total_batch = int(TRAINING_SET_SIZE / BATCH_SIZE
                ) + 1 if TRAINING_SET_SIZE % BATCH_SIZE != 0 else int(
                TRAINING_SET_SIZE / BATCH_SIZE)
            for step in range(1, total_batch +1):
                if coord.should_stop():
                    break
                _, train_loss = sess.run([train_op, avg_loss]) 
                epoch_avg_loss += train_loss / total_batch
                assert not np.isnan(train_loss), 'Model diverged with loss = NaN'
                
                if step % DISPLAY_STEP == 0:
                    print('%s: epoch %d, step %d, train_loss = %.6f'
                        % (datetime.now(), epoch, step, train_loss))
                
            print('%s: epoch %d avg_loss = %.6f'
                % (datetime.now(), epoch, epoch_avg_loss))
                
    except tf.errors.OutOfRangeError as e:
        print(e.error_code, e.message)
        print('Done!')
    
    finally:
        coord.request_stop()
    
    coord.join(threads)
    sess.close()
    
print('Finished!')

2017-04-24 14:39:05.919187: epoch 1 avg_loss = 1.147794
2017-04-24 14:39:05.957375: epoch 2 avg_loss = 1.173444
2017-04-24 14:39:05.992926: epoch 3 avg_loss = 1.125526
2017-04-24 14:39:06.025304: epoch 4 avg_loss = 1.156233
2017-04-24 14:39:06.057438: epoch 5 avg_loss = 1.155858
2017-04-24 14:39:06.089595: epoch 6 avg_loss = 1.097722
2017-04-24 14:39:06.128443: epoch 7 avg_loss = 1.140637
2017-04-24 14:39:06.165273: epoch 8 avg_loss = 1.113705
2017-04-24 14:39:06.201642: epoch 9 avg_loss = 1.132878
2017-04-24 14:39:06.232779: epoch 10 avg_loss = 1.090742
2017-04-24 14:39:06.262576: epoch 11 avg_loss = 1.116564
2017-04-24 14:39:06.294061: epoch 12 avg_loss = 1.115048
2017-04-24 14:39:06.325620: epoch 13 avg_loss = 1.062377
2017-04-24 14:39:06.363779: epoch 14 avg_loss = 1.094674
2017-04-24 14:39:06.393786: epoch 15 avg_loss = 1.072043
2017-04-24 14:39:06.428580: epoch 16 avg_loss = 1.079789
2017-04-24 14:39:06.462332: epoch 17 avg_loss = 1.044403
2017-04-24 14:39:06.494654: epoch 18 avg

In [3]:
## Illustration of intermediate results, run-time values excerpted from tfdbg
tf.reset_default_graph()
labels_flat = np.array([0,1,2,1,0,0,1,1,0])
logits_flat = np.array([[0.6422022 , -0.08686169, -0.13423285],
                        [0.88595915, -0.87825918, -1.19131315],
                        [1.09363282, -0.92535424, -0.90869629],
                        [1.34931934,  0.12072743, -0.35725975],
                        [2.14791036, -0.19916281, -0.59185725],
                        [1.07656074, -1.0128665 , -1.37606812],
                        [0.41977212, -0.05744966, -0.13450697],
                        [1.32846284, -0.24778995, -0.44169524],
                        [0.1       ,  0.1       ,  0.1       ]], dtype=np.float32)
losses_flat = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_flat, labels=labels_flat)

actual_lengths = np.array([3,3,2])
seq_mask_flat = np.array([1.,1.,1.,1.,1.,1.,1.,1.,0.], dtype=np.float32)
masked_losses_flat = tf.multiply(seq_mask_flat, losses_flat)
labels = tf.reshape(labels_flat, [3, -1])
masked_losses = tf.reshape(masked_losses_flat, tf.shape(labels))
mean_loss_by_example = tf.div(tf.reduce_sum(masked_losses, axis=1), tf.cast(actual_lengths, tf.float32))
mean_loss = tf.reduce_mean(mean_loss_by_example)                            

with tf.Session() as sess:
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    a, b, c, d, e = sess.run([losses_flat, masked_losses_flat, masked_losses, mean_loss_by_example, mean_loss])
    print(a)
    print(b)
    print(c)
    print(d)
    print("%.15f" % e)

[ 0.66392612  2.02395773  2.23962045  1.61670065  0.14862178  0.19047575
  1.26339984  1.89619958  1.09861231]
[ 0.66392612  2.02395773  2.23962045  1.61670065  0.14862178  0.19047575
  1.26339984  1.89619958  0.        ]
[[ 0.66392612  2.02395773  2.23962045]
 [ 1.61670065  0.14862178  0.19047575]
 [ 1.26339984  1.89619958  0.        ]]
[ 1.64250147  0.65193272  1.57979965]
1.291411280632019


In [4]:
## Illustration of intermediate results, run-time values excerpted from tfdbg

tf.reset_default_graph()
labels_flat = np.array([2, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 2, 0])
logits_flat = np.array([[0.09829693 , 0.1014218 ,  0.09910872],
                        [0.49634132, -0.2791599 , -0.24879429],
                        [0.57596284, -1.16146052, -1.34416914],
                        [1.42855453, -0.80200392, -0.69387823],
                        [0.099     ,  0.101     ,  0.09900001],
                        [2.36766672, -0.05113668, -0.49951288],
                        [1.89118695, -1.02813923, -1.18361068],
                        [0.099     ,  0.101     ,  0.09900001],
                        [0.099     ,  0.101     ,  0.09900001],
                        [0.099     ,  0.101     ,  0.09900001],
                        [1.59493768, -0.56841475, -0.65098649],
                        [1.08355463, -1.22900999, -1.47534275],
                        [1.36640537, -0.7257424 , -1.01273966],
                        [1.60844588, -0.58185029, -0.74234509],
                        [1.38049722, -0.61191964, -0.75697964],
                       ], dtype=np.float32)
losses_flat = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_flat, labels=labels_flat)

actual_lengths = np.array([4,2,5])
seq_mask_flat = np.array([1.,1.,1.,1.,0.,1.,1.,0.,0.,0.,1.,1.,1.,1.,1.], dtype=np.float32)
masked_losses_flat = tf.multiply(seq_mask_flat, losses_flat)
labels = tf.reshape(labels_flat, [3, -1])
masked_losses = tf.reshape(masked_losses_flat, tf.shape(labels))
mean_loss_by_example = tf.div(tf.reduce_sum(masked_losses, axis=1), tf.cast(actual_lengths, tf.float32))
mean_loss = tf.reduce_mean(mean_loss_by_example)                            

with tf.Session() as sess:
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    a, b, c, d, e = sess.run([losses_flat, masked_losses_flat, masked_losses, mean_loss_by_example, mean_loss])
    print(a)
    print(b)
    print(c)
    print(d)
    print("%.15f" % e)

[ 1.0991137   1.43568242  2.01699328  0.20474197  1.0992794   3.00335884
  0.09546375  1.0992794   1.0992794   1.0992794   0.19948083  2.47502112
  2.28775692  2.53907585  0.22659162]
[ 1.0991137   1.43568242  2.01699328  0.20474197  0.          3.00335884
  0.09546375  0.          0.          0.          0.19948083  2.47502112
  2.28775692  2.53907585  0.22659162]
[[ 1.0991137   1.43568242  2.01699328  0.20474197  0.        ]
 [ 3.00335884  0.09546375  0.          0.          0.        ]
 [ 0.19948083  2.47502112  2.28775692  2.53907585  0.22659162]]
[ 1.18913281  1.5494113   1.54558539]
1.428043007850647


In [5]:
# References:
# (1) http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/
# (2) https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/
# (3) https://danijar.com/variable-sequence-lengths-in-tensorflow/
# (4) https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py)
# (5) https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/dynamic_rnn.py
# (6) https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767
