In [1]:
import numpy as np
import tensorflow as tf


In [2]:
## SOLUTION I:
####
# USE label 0 to mask: actual label 0 shifted to 1, and num_class to be one size larger
####

tf.reset_default_graph()

tf.set_random_seed(10)
np.random.seed(10)

# Batch size
B = 4 
# Max number of time steps in a batch
T = 8
RNN_DIM = 128
NUM_CLASS = 10 # actually 9 in use

# Acutal length of examples
example_len = [1,2,3,8]

# The classes of the examples at each step (Classes 1 ~ 9 used, 0 means padding)
y = np.random.randint(1, NUM_CLASS, [B, T])
for i, length in enumerate(example_len):
    y[i, length:] = 0

# RNN outputs
rnn_outputs = tf.convert_to_tensor(np.random.randn(B, T, RNN_DIM), dtype=tf.float32)

# Output layer weights
W = tf.get_variable(name="W", initializer=tf.random_normal_initializer(), shape=[RNN_DIM, NUM_CLASS])

# Calculate logits and probs
# Reshape so we can calculate them all at once
rnn_outputs_flat = tf.reshape(rnn_outputs, [-1, RNN_DIM])
logits_flat = tf.matmul(rnn_outputs_flat, W)
probs_flat = tf.nn.softmax(logits_flat)

# Calculate the losses
y_flat = tf.reshape(y, [-1])
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_flat, labels=y_flat)

# Mask the losses via 
mask = tf.sign(tf.to_float(y_flat))
masked_losses = mask * losses

# Bring back to [B, T] shape
masked_losses = tf.reshape(masked_losses, tf.shape(y))

# Calculate mean loss
mean_loss_by_example = tf.reduce_sum(masked_losses, axis=1) / example_len
mean_loss = tf.reduce_mean(mean_loss_by_example)

# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     mean_loss = sess.run(mean_loss)
#     print(mean_loss)
    
result = tf.contrib.learn.run_n(
    {
        "y_flat" : y_flat,
        "mask" : mask,
        "masked_losses": masked_losses,
        "mean_loss_by_example": mean_loss_by_example,
        "mean_loss": mean_loss
    },
    n=1,
    feed_dict=None)

print y
print(result[0]["y_flat"])
print(result[0]["mask"])
print(result[0]["masked_losses"])
print(result[0]["mean_loss_by_example"])
print(result[0]["mean_loss"])


Instructions for updating:
graph_actions.py will be deleted. Use tf.train.* utilities instead. You can use learn/estimators/estimator.py as an example.
Instructions for updating:
graph_actions.py will be deleted. Use tf.train.* utilities instead. You can use learn/estimators/estimator.py as an example.
Instructions for updating:
graph_actions.py will be deleted. Use tf.train.* utilities instead. You can use learn/estimators/estimator.py as an example.
[[5 0 0 0 0 0 0 0]
 [7 5 0 0 0 0 0 0]
 [9 5 2 0 0 0 0 0]
 [2 5 3 7 8 9 9 3]]
[5 0 0 0 0 0 0 0 7 5 0 0 0 0 0 0 9 5 2 0 0 0 0 0 2 5 3 7 8 9 9 3]
[ 1.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  1.  1.
  1.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  1.]
[[ 24.29052544   0.           0.           0.           0.           0.
    0.           0.        ]
 [ 13.50571346   7.59000778   0.           0.           0.           0.
    0.           0.        ]
 [ 20.49732208   8.08126068  23.02852821   0.           0.          

In [3]:
## SOLUTION II:
####
# USE sequence_length to mask, independent of num_class & labels
####

tf.reset_default_graph()

tf.set_random_seed(10)
np.random.seed(10)

# Batch size
B = 4 
# Max number of time steps in a batch
T = 8
RNN_DIM = 128
NUM_CLASS = 10

# Acutal length of examples
example_len = [1,2,3,8]


# The classes of the examples at each step (Classes 0 ~ 9 used)
y = np.random.randint(0, NUM_CLASS, [B, T])

# RNN outputs (faked, simulating the outputs returned by tf.rnn.dynamic_rnn(...))
rnn_outputs = tf.convert_to_tensor(np.random.randn(B, T, RNN_DIM), dtype=tf.float32)

# Output layer weights
W = tf.get_variable(name="W", initializer=tf.random_normal_initializer(), shape=[RNN_DIM, NUM_CLASS])

# Calculate logits and probs
# Reshape so we can calculate them all at once
rnn_outputs_flat = tf.reshape(rnn_outputs, [-1, RNN_DIM])
logits_flat = tf.matmul(rnn_outputs_flat, W)
probs_flat = tf.nn.softmax(logits_flat)
probs = tf.reshape(probs_flat, [-1, T, NUM_CLASS])

# Calculate the losses
y_flat = tf.reshape(y, [-1])
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_flat, labels=y_flat)
# Mask the losses via sequence length
mask = tf.sequence_mask(example_len, T, dtype=tf.float32)
mask = tf.reshape(mask, [-1])
masked_losses = mask * losses


# Bring back to [B, T] shape
masked_losses = tf.reshape(masked_losses, tf.shape(y))
# Calculate mean loss
mean_loss_by_example = tf.reduce_sum(masked_losses, axis=1) / example_len
mean_loss = tf.reduce_mean(mean_loss_by_example)


# Calculate the losses ver 2
cross_entropy = tf.one_hot(y_flat, NUM_CLASS) * tf.log(probs_flat)
cross_entropy = -tf.reduce_sum(cross_entropy, axis=1)
masked_ce = mask * cross_entropy
masked_ce = tf.reshape(masked_ce, tf.shape(y))
mean_ce_by_example = tf.reduce_sum(masked_losses, axis=1) / example_len
mean_ce = tf.reduce_mean(mean_ce_by_example)


# Calculate error
errors = tf.not_equal(y_flat, tf.argmax(probs_flat, 1))
errors = tf.cast(errors, tf.float32)
masked_errors = mask * errors
masked_errors = tf.reshape(masked_errors, tf.shape(y))
mean_error_by_example = tf.reduce_sum(masked_errors, axis=1) / example_len
mean_error = tf.reduce_mean(mean_error_by_example)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    mean_loss, masked_errors, mean_ce = sess.run([mean_loss, masked_errors, mean_ce])
    print("version 1 loss: %f ; version 2 loss: %f" %(mean_loss, mean_ce))
    print(masked_errors)

    
# result = tf.contrib.learn.run_n(
#     {
#         "y_flat" : y_flat,
#         "mask" : mask,
#         "masked_losses": masked_losses,
#         "mean_loss_by_example": mean_loss_by_example,
#         "mean_loss": mean_loss,
#         "probs": probs,
#         "predicts": tf.argmax(probs,2),
#         "errors": errors,
#         "masked_errors": masked_errors,
#         "mean_error_by_example": mean_error_by_example,
#         "mean_error": mean_error,
#         "masked_ce": masked_ce,
#         "mean_ce_by_example": mean_ce_by_example,        
#         "mean_ce": mean_ce
#     },
#     n=1,
#     feed_dict=None)

# print(result[0]["y_flat"])
# print(result[0]["mask"])
# print(result[0]["masked_losses"])
# print(result[0]["mean_loss_by_example"])
# print(result[0]["mean_loss"])
# print ("--------")
# print(result[0]["probs"].shape)
# print(y)
# print(result[0]["predicts"])
# print(result[0]["errors"])
# print(result[0]["masked_errors"])
# print(result[0]["mean_error_by_example"])
# print(result[0]["mean_error"])
# print ("--------")
# print(result[0]["masked_ce"])
# print(result[0]["mean_ce_by_example"])
# print(result[0]["mean_ce"])


version 1 loss: 16.384155 ; version 2 loss: 16.384155
[[ 1.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  1.  0.  0.  0.  0.  0.]
 [ 0.  1.  1.  1.  1.  1.  1.  1.]]


In [None]:
# Notes:
# (1) tf.nn.rnn was moved to tf.contrib.rnn.static_rnn(...), be noted that 
#     <1> its "inputs" is a length T list of inputs, which is in [batch_size, input_size] shape
#     <2> its returned a pair of (outputs, state) where 
#         -- outputs is a length T list of outputs shaped in [batch_size, hidden_units]
#         -- state is the final state
# (2) tf.nn.dynamic_rnn(...): 
#     <1> it's "inputs" is a tensor of shape [batch_size, max_time_steps, input_features]
#     <2> it returns a pair of (outputs, state) where 
#         -- outputs is a tensor shaped in [batch_size, max_time_step, hidden_units]
#         -- state is the final state shaped in [batch_size, hidden_units]
#

# tensor in [batch_size, max_time_steps, input_features]
def unstack_sequence(tensor):
    """Split the single tensor of a sequence into a list of frames."""
    return tf.unstack(tf.transpose(tensor, perm=[1, 0, 2]))

def stack_sequence(sequence):
    """Combine a list of the frames into a single tensor of the sequence."""
    return tf.transpose(tf.stack(sequence), perm=[1, 0, 2])

# (2) Assume equal-length inputs rather than variable-length inputs:
#     To retrieve the output of the last time step (a batch unit):
#
#     output, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
#     <i> earlier way
#     output = tf.transpose(output, [1, 0, 2])
#     last = tf.gather(output, int(output.get_shape()[0]) - 1)
#     <ii> current way
#     output = tf.transpose(output, [1, 0, 2])
#     last = output[int(output.get_shape()[0]) - 1]


# (3) Calculate actual timestep length of examples of a batch (examples may have different lengths)
#     Note: It won't work if zero vectors could be actual input features
# tensor in shape of [batch_size, max_time_steps, input_features]
def length(tensor):
    used = tf.sign(tf.reduce_max(tf.abs(tensor), axis=2))
    length = tf.reduce_sum(used, axis=1)
    length = tf.cast(length, tf.int32)
    return length


# (4) Get the last relevent output of variable time-step length examples in a batch
# <1> Old way:
def last_relevant(outputs, seq_lengths):
    """
    :param outputs: [batch_size x max_seq_length x hidden_size] tensor of dynamic_rnn outputs
    :param seq_lengths: [batch_size] tensor of sequence lengths
    :return: [batch_size x hidden_size] tensor of last outputs
    """
    batch_size, max_seq_length, hidden_size = tf.unpack(tf.shape(outputs))
    index = tf.range(0, batch_size) * max_seq_length + (seq_lengths - 1)
    return tf.gather(tf.reshape(outputs, [-1, hidden_size]), index)

# <2> New way:
def _last_relevant(outputs, actual_lengths):
    """
    :param outputs: [batch_size x max_seq_length x hidden_size] tensor of dynamic_rnn outputs
    :param actual_lengths: [batch_size] tensor of sequence actual lengths
    :return: [batch_size x hidden_size] tensor of last outputs
    """
    batch_size = tf.shape(outputs)[0]
    return tf.gather_nd(outputs, tf.stack([tf.range(batch_size), actual_lengths - 1], axis=1))



In [None]:
# References:
# (1) http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/
# (2) https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/
# (3) https://danijar.com/variable-sequence-lengths-in-tensorflow/
# (4) https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py)
# (5) https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/dynamic_rnn.py
# (6) https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767
