In [1]:
import numpy as np
import tensorflow as tf


In [2]:
####
# Sequence Labeling with variable-length sequences
# Old approach to get the last relevant output for classification
####

import os
from datetime import datetime

tf.reset_default_graph()

NUM_HIDDEN = 5
NUM_CLASS = 3
FEATURE_SIZE_PER_TIMESTEP = 5

SEED = 10 # debugging & diagnostics purpose

### Data pipeline
def input_pipeline(filename, batch_size, epochs=None):
    file_list = [os.path.join(os.getcwd(), filename)]
    file_queue = tf.train.string_input_producer(file_list, num_epochs=epochs)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)
    sequence_features = {
        "inputs": tf.FixedLenSequenceFeature([FEATURE_SIZE_PER_TIMESTEP], dtype=tf.float32),
        "label": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }
    _, sequence = tf.parse_single_sequence_example(
        serialized=serialized_example,
        sequence_features=sequence_features)

    actual_length = tf.shape(sequence["inputs"])[0]
    batch_lengths, batch_sequences, batch_labels = tf.train.batch(
        [actual_length, sequence["inputs"], sequence["label"]],
        batch_size=batch_size,
        dynamic_pad=True,
        allow_smaller_final_batch=True,
        name="input_batching")
    return batch_lengths, batch_sequences, batch_labels


def _last_relevant(outputs, actual_lengths):
    """
    :param outputs: [batch_size x max_seq_length x hidden_size] tensor of dynamic_rnn outputs
    :param actual_lengths: [batch_size] tensor of sequence actual lengths
    :return: [batch_size x hidden_size] tensor of last outputs
    """
    batch_size, max_seq_length, hidden_size = tf.unstack(tf.shape(outputs))
    index = tf.range(0, batch_size) * max_seq_length + (actual_lengths - 1)
    return tf.gather(tf.reshape(outputs, [-1, hidden_size]), index)


### Build Model
def inference(inputs, actual_lengths):
    cell = tf.contrib.rnn.LSTMCell(NUM_HIDDEN, initializer=tf.truncated_normal_initializer(seed=SEED))
    outputs, current_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32, sequence_length=actual_lengths)
    last_outputs = _last_relevant(outputs, actual_lengths)
    # Output layer weights & biases
    weights = tf.Variable(tf.truncated_normal([NUM_HIDDEN, NUM_CLASS], seed=SEED), dtype=tf.float32)
    biases = tf.Variable(tf.constant(0.1, shape=[NUM_CLASS]), dtype=tf.float32)
    # Softmax classification based on outputs of the last time step of each sequence
    logits = tf.add(tf.matmul(last_outputs, weights), biases)
    predictions = tf.nn.softmax(logits)
    return logits, predictions


## Cost function
def loss(logits, labels, actual_lengths):
    labels_flat = tf.reshape(labels, [-1])
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels_flat)
    mean_loss = tf.reduce_mean(cross_entropy)
    return mean_loss


## Error tracking 
def error(predictions, labels, actual_lengths):
    labels_flat = tf.reshape(labels, [-1])
    errors = tf.not_equal(tf.argmax(predictions, 1))
    mean_error = tf.reduce_mean(tf.cast(errors, tf.float32))
    return mean_error
    

def training(loss, learning_rate):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)    
    return train_op


### Training
NUM_EPOCHS = 100
BATCH_SIZE = 3
DISPLAY_STEP = 5
LEARNING_RATE = 1e-3
TRAINING_SET_SIZE = 7

filename = 'Sequence_classification.tfr'
with tf.Graph().as_default():
    tf.set_random_seed(SEED)
    np.random.seed(SEED)    
    # Build Graph
    lengths, sequences, labels = input_pipeline(filename, BATCH_SIZE)
    logits, _ = inference(sequences, lengths)
    avg_loss = loss(logits, labels, lengths)
    train_op = training(avg_loss, LEARNING_RATE)
    
    # Create & Initialize Session
    sess = tf.Session()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    # Start QueueRunner
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    try: 
        # Training cycles
        for epoch in range(1, NUM_EPOCHS+1):
            epoch_avg_loss = 0.0
            total_batch = int(TRAINING_SET_SIZE / BATCH_SIZE
                ) + 1 if TRAINING_SET_SIZE % BATCH_SIZE != 0 else int(
                TRAINING_SET_SIZE / BATCH_SIZE)
            for step in range(1, total_batch +1):
                if coord.should_stop():
                    break
                _, train_loss = sess.run([train_op, avg_loss]) 
                epoch_avg_loss += train_loss / total_batch
                assert not np.isnan(train_loss), 'Model diverged with loss = NaN'
                
                if step % DISPLAY_STEP == 0:
                    print('%s: epoch %d, step %d, train_loss = %.6f'
                        % (datetime.now(), epoch, step, train_loss))
                
            print('%s: epoch %d avg_loss = %.6f'
                % (datetime.now(), epoch, epoch_avg_loss))
                
    except tf.errors.OutOfRangeError as e:
        print(e.error_code, e.message)
        print('Done!')
    
    finally:
        coord.request_stop()
    
    coord.join(threads)
    sess.close()
    
print('Finished!')

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


2017-04-25 11:23:35.529648: epoch 1 avg_loss = 1.119818
2017-04-25 11:23:35.565268: epoch 2 avg_loss = 1.021896
2017-04-25 11:23:35.602352: epoch 3 avg_loss = 1.091379
2017-04-25 11:23:35.634904: epoch 4 avg_loss = 1.038721
2017-04-25 11:23:35.667892: epoch 5 avg_loss = 1.073843
2017-04-25 11:23:35.703451: epoch 6 avg_loss = 1.020319
2017-04-25 11:23:35.746945: epoch 7 avg_loss = 1.036178
2017-04-25 11:23:35.789090: epoch 8 avg_loss = 1.083859
2017-04-25 11:23:35.826458: epoch 9 avg_loss = 0.989597
2017-04-25 11:23:35.863207: epoch 10 avg_loss = 1.055611
2017-04-25 11:23:35.898962: epoch 11 avg_loss = 1.006814
2017-04-25 11:23:35.932592: epoch 12 avg_loss = 1.041140
2017-04-25 11:23:35.970832: epoch 13 avg_loss = 0.991523
2017-04-25 11:23:36.008010: epoch 14 avg_loss = 1.003261
2017-04-25 11:23:36.033581: epoch 15 avg_loss = 1.053336
2017-04-25 11:23:36.066421: epoch 16 avg_loss = 0.962615
2017-04-25 11:23:36.100471: epoch 17 avg_loss = 1.024655
2017-04-25 11:23:36.135380: epoch 18 avg

In [3]:
outputs = np.array([[[-1.11184277e-01, 2.33777296e-02, -5.76924253e-03, -2.03809544e-01, 2.85978884e-01],
                     [-6.20732462e-05, 2.37267528e-07, -3.32064616e-08, -1.00004703e-01, 6.70888603e-01],
                     [-6.59987450e-01, 3.52891497e-02, -8.80478648e-04, -2.64588028e-01, 2.42421087e-02]],
                    
                    [[-1.14256974e-04, 1.39966013e-03, -1.43084544e-05, -3.55578102e-02, 4.55059946e-01],
                     [-1.29182416e-03, 1.95695069e-08, -2.30110445e-11, -3.71065177e-02, 5.85010469e-01],
                     [-1.13629685e-04, 4.00667477e-06, -4.22743142e-07, -1.92035735e-01, 7.23191023e-01]],
                    
                    [[ 3.30646448e-02, 8.29776973e-02, -9.03536081e-02, -2.44489789e-01, 2.69344389e-01],
                     [-3.35084200e-02, 7.04476086e-04, -1.08449552e-04, -2.25155890e-01, 4.65190321e-01],
                     [ 0.00000000e+00, 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, 0.00000000e+00]]])
actual_lengths = np.array([3,3,2], dtype=np.int64)

batch_size, max_seq_length, hidden_size = tf.unstack(tf.shape(outputs))

## (1) Old way:
##     tf.gatehr(...) to gather last output via flatten outputs & 1d index array 
##     NOTE: This approach works, but throws a UserWarning as below,
##         UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. 
##                      This may consume a large amount of memory.
##                      "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
index_flat = tf.range(0, batch_size) * max_seq_length + (actual_lengths - 1)
out_1 = tf.gather(tf.reshape(outputs, [-1, hidden_size]), index_flat)

## (2) New way:
##     tf.gather_nd(...) to gather last output via 2d index array directly
index_nd = tf.stack([tf.range(0, batch_size), (actual_lengths - 1)], axis=1)
out_2 = tf.gather_nd(outputs, index_nd)

result = tf.contrib.learn.run_n(
    {
        "index_flat" : index_flat,
        "out_1": out_1,
        "index_nd" : index_nd,
        "out_2" : out_2,
    },
    n=1,
    feed_dict=None)

print(result[0]["index_flat"])
print(result[0]["out_1"])
print(result[0]["index_nd"])
print(result[0]["out_2"])


Instructions for updating:
graph_actions.py will be deleted. Use tf.train.* utilities instead. You can use learn/estimators/estimator.py as an example.
Instructions for updating:
graph_actions.py will be deleted. Use tf.train.* utilities instead. You can use learn/estimators/estimator.py as an example.
Instructions for updating:
graph_actions.py will be deleted. Use tf.train.* utilities instead. You can use learn/estimators/estimator.py as an example.
[2 5 7]
[[ -6.59987450e-01   3.52891497e-02  -8.80478648e-04  -2.64588028e-01
    2.42421087e-02]
 [ -1.13629685e-04   4.00667477e-06  -4.22743142e-07  -1.92035735e-01
    7.23191023e-01]
 [ -3.35084200e-02   7.04476086e-04  -1.08449552e-04  -2.25155890e-01
    4.65190321e-01]]
[[0 2]
 [1 2]
 [2 1]]
[[ -6.59987450e-01   3.52891497e-02  -8.80478648e-04  -2.64588028e-01
    2.42421087e-02]
 [ -1.13629685e-04   4.00667477e-06  -4.22743142e-07  -1.92035735e-01
    7.23191023e-01]
 [ -3.35084200e-02   7.04476086e-04  -1.08449552e-04  -2.251558

In [4]:
####
# Sequence Labeling with variable-length sequences
# New approach to get the last relevant output for classification
####

import os
from datetime import datetime

tf.reset_default_graph()

NUM_HIDDEN = 5
NUM_CLASS = 3
FEATURE_SIZE_PER_TIMESTEP = 5

SEED = 10 # debugging & diagnostics purpose

### Data pipeline
def input_pipeline(filename, batch_size, epochs=None):
    file_list = [os.path.join(os.getcwd(), filename)]
    file_queue = tf.train.string_input_producer(file_list, num_epochs=epochs)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)
    sequence_features = {
        "inputs": tf.FixedLenSequenceFeature([FEATURE_SIZE_PER_TIMESTEP], dtype=tf.float32),
        "label": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }
    _, sequence = tf.parse_single_sequence_example(
        serialized=serialized_example,
        sequence_features=sequence_features)

    actual_length = tf.shape(sequence["inputs"])[0]
    batch_lengths, batch_sequences, batch_labels = tf.train.batch(
        [actual_length, sequence["inputs"], sequence["label"]],
        batch_size=batch_size,
        dynamic_pad=True,
        allow_smaller_final_batch=True,
        name="input_batching")
    return batch_lengths, batch_sequences, batch_labels



def _last_relevant(outputs, actual_lengths):
    """
    :param outputs: [batch_size x max_seq_length x hidden_size] tensor of dynamic_rnn outputs
    :param actual_lengths: [batch_size] tensor of sequence actual lengths
    :return: [batch_size x hidden_size] tensor of last outputs
    """
    batch_size = tf.shape(outputs)[0]
    return tf.gather_nd(outputs, tf.stack([tf.range(batch_size), actual_lengths - 1], axis=1))


### Build Model
def inference(inputs, actual_lengths):
    cell = tf.contrib.rnn.LSTMCell(NUM_HIDDEN, initializer=tf.truncated_normal_initializer(seed=SEED))
    outputs, current_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32, sequence_length=actual_lengths)
    last_outputs = _last_relevant(outputs, actual_lengths)
    # Output layer weights & biases
    weights = tf.Variable(tf.truncated_normal([NUM_HIDDEN, NUM_CLASS], seed=SEED), dtype=tf.float32)
    biases = tf.Variable(tf.constant(0.1, shape=[NUM_CLASS]), dtype=tf.float32)
    # Softmax classification based on outputs of the last time step of each sequence
    logits = tf.add(tf.matmul(last_outputs, weights), biases)
    predictions = tf.nn.softmax(logits)
    return logits, predictions


## Cost function
def loss(logits, labels, actual_lengths):
    labels_flat = tf.reshape(labels, [-1])
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels_flat)
    mean_loss = tf.reduce_mean(cross_entropy)
    return mean_loss


## Error tracking 
def error(predictions, labels, actual_lengths):
    labels_flat = tf.reshape(labels, [-1])
    errors = tf.not_equal(tf.argmax(predictions, 1))
    mean_error = tf.reduce_mean(tf.cast(errors, tf.float32))
    return mean_error
    

def training(loss, learning_rate):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)    
    return train_op


### Training
NUM_EPOCHS = 100
BATCH_SIZE = 3
DISPLAY_STEP = 5
LEARNING_RATE = 1e-3
TRAINING_SET_SIZE = 7

filename = 'Sequence_classification.tfr'
with tf.Graph().as_default():
    tf.set_random_seed(SEED)
    np.random.seed(SEED)    
    # Build Graph
    lengths, sequences, labels = input_pipeline(filename, BATCH_SIZE)
    logits, _ = inference(sequences, lengths)
    avg_loss = loss(logits, labels, lengths)
    train_op = training(avg_loss, LEARNING_RATE)
    
    # Create & Initialize Session
    sess = tf.Session()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    # Start QueueRunner
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    try: 
        # Training cycles
        for epoch in range(1, NUM_EPOCHS+1):
            epoch_avg_loss = 0.0
            total_batch = int(TRAINING_SET_SIZE / BATCH_SIZE
                ) + 1 if TRAINING_SET_SIZE % BATCH_SIZE != 0 else int(
                TRAINING_SET_SIZE / BATCH_SIZE)
            for step in range(1, total_batch +1):
                if coord.should_stop():
                    break
                _, train_loss = sess.run([train_op, avg_loss]) 
                epoch_avg_loss += train_loss / total_batch
                assert not np.isnan(train_loss), 'Model diverged with loss = NaN'
                
                if step % DISPLAY_STEP == 0:
                    print('%s: epoch %d, step %d, train_loss = %.6f'
                        % (datetime.now(), epoch, step, train_loss))
                
            print('%s: epoch %d avg_loss = %.6f'
                % (datetime.now(), epoch, epoch_avg_loss))
                
    except tf.errors.OutOfRangeError as e:
        print(e.error_code, e.message)
        print('Done!')
    
    finally:
        coord.request_stop()
    
    coord.join(threads)
    sess.close()
    
print('Finished!')

2017-04-25 11:23:41.616083: epoch 1 avg_loss = 1.119818
2017-04-25 11:23:41.654573: epoch 2 avg_loss = 1.021896
2017-04-25 11:23:41.690342: epoch 3 avg_loss = 1.091379
2017-04-25 11:23:41.723877: epoch 4 avg_loss = 1.038721
2017-04-25 11:23:41.758106: epoch 5 avg_loss = 1.073843
2017-04-25 11:23:41.791471: epoch 6 avg_loss = 1.020319
2017-04-25 11:23:41.832775: epoch 7 avg_loss = 1.036178
2017-04-25 11:23:41.873665: epoch 8 avg_loss = 1.083859
2017-04-25 11:23:41.915846: epoch 9 avg_loss = 0.989597
2017-04-25 11:23:41.962309: epoch 10 avg_loss = 1.055611
2017-04-25 11:23:42.007428: epoch 11 avg_loss = 1.006814
2017-04-25 11:23:42.044389: epoch 12 avg_loss = 1.041140
2017-04-25 11:23:42.073556: epoch 13 avg_loss = 0.991523
2017-04-25 11:23:42.109219: epoch 14 avg_loss = 1.003261
2017-04-25 11:23:42.136669: epoch 15 avg_loss = 1.053336
2017-04-25 11:23:42.176539: epoch 16 avg_loss = 0.962615
2017-04-25 11:23:42.207938: epoch 17 avg_loss = 1.024655
2017-04-25 11:23:42.242223: epoch 18 avg