In [1]:
import numpy as np
import sys
import tensorflow as tf

import rnn_cells

In [2]:
def _build_recurrent_cell(hidden_dim, dropout_keep_prob):
    return rnn_cells.LayerNormLSTMCell(
        hidden_dim, 
        use_recurrent_dropout=True,
        dropout_keep_prob=dropout_keep_prob
    )

def compute_n_batches(n_samples, batch_size):
    n_batches = n_samples // batch_size
    if n_samples % batch_size != 0:
        n_batches += 1
    return n_batches

In [3]:
class Network(object):
    pass

class RNN(Network):
    
    def __init__(
            self,
            name,
            input_dim,
            hidden_dim,
            max_len,
            output_dim,
            batch_size=100,
            dropout_keep_prob=1.,
            learning_rate=0.001,
            grad_clip=1.):
        self.name = name
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.max_len = max_len
        self.output_dim = output_dim
        self.batch_size = batch_size
        self.dropout_keep_prob = dropout_keep_prob
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        
        self.build_model()
        
    def build_model(self):
        with tf.variable_scope(self.name):
            self.build_placeholders()
            self.build_network()
            self.build_loss()
            self.build_train_op()
            self.build_summaries()
    
    def build_placeholders(self):
        self.inputs = tf.placeholder(tf.float32, (self.batch_size, self.max_len, self.input_dim), 'inputs')
        self.targets = tf.placeholder(tf.int32, (self.batch_size, self.max_len), 'targets')
        self.lengths = tf.placeholder(tf.int32, (self.batch_size,), 'lengths')
        self.sequence_mask = tf.sequence_mask(self.lengths, maxlen=self.max_len, dtype=tf.float32)
        self.dropout_keep_prop_ph = tf.placeholder_with_default(self.dropout_keep_prob, (), 'dropout_keep_prob')
        
    def build_network(self):
        self.cell_fw = _build_recurrent_cell(self.hidden_dim, self.dropout_keep_prop_ph)
        self.cell_bw = _build_recurrent_cell(self.hidden_dim, self.dropout_keep_prop_ph)
        
        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            self.cell_fw,
            self.cell_bw,
            inputs=self.inputs,
            sequence_length=self.lengths,
            dtype=tf.float32,
            time_major=False
        )
        
        outputs = tf.concat(outputs, axis=1)
        outputs = tf.reshape(outputs, (self.batch_size * self.max_len, -1))
        scores = tf.contrib.layers.fully_connected(
            outputs,
            self.output_dim,
            activation_fn=None
        )
        self.scores = tf.reshape(scores, (self.batch_size, self.max_len, self.output_dim))
        self.probs = tf.nn.softmax(self.scores)
        
    def build_loss(self):
        
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.targets, logits=self.scores)
        loss = tf.reduce_sum(self.sequence_mask * losses, axis=1) / tf.cast(self.lengths, tf.float32)
        self.loss = tf.reduce_mean(loss)
        
    def build_train_op(self):
        self.var_list = tf.trainable_variables()
        
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        grads_vars = optimizer.compute_gradients(self.loss, self.var_list)
        clipped_grads_vars = [(tf.clip_by_value(g, -self.grad_clip, self.grad_clip), v) for (g,v) in grads_vars]
        self.train_op = optimizer.apply_gradients(clipped_grads_vars)
        
    def build_summaries(self):
        pass
        
    def train(self, data, n_epochs=100):
        sess = tf.get_default_session()
        
        n_samples = len(data['train_x'])
        n_batches = compute_n_batches(n_samples, self.batch_size)
        
        for epoch in range(n_epochs):
            total_loss = 0
            for bidx in range(n_batches):
                s = bidx * self.batch_size
                e = s + self.batch_size
                
                feed_dict = {
                    self.inputs:data['train_x'][s:e],
                    self.targets:data['train_y'][s:e],
                    self.lengths:data['train_lengths'][s:e]
                }
                outputs_list = [self.loss, self.train_op]
                loss, _ = sess.run(outputs_list, feed_dict=feed_dict)
                total_loss += loss
                sys.stdout.write('\repoch: {} / {} batch: {} / {} loss: {}'.format(
                    epoch+1, n_epochs, bidx+1, n_batches, total_loss / (self.batch_size * (bidx+1))))
    

In [4]:
tf.reset_default_graph()
input_dim = 4
hidden_dim = 16
max_len = 8
output_dim = 2
batch_size = 100
model = RNN(
    'test', 
    input_dim, 
    hidden_dim, 
    max_len, 
    output_dim, 
    batch_size=batch_size
)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [10]:
n_samples = batch_size
x = np.random.randn(n_samples, max_len, input_dim)
y = np.sum(x, axis=(2))
y[y>0] = 1
y[y<=0] = 0
y = y.astype(int)
data = dict(
    train_x=x, 
    train_y=y, 
    train_lengths=np.ones(n_samples, dtype=int) * max_len
)

In [17]:
model.train(data)

epoch: 100 / 100 batch: 1 / 1 loss: 5.4935552179813384e-05