# Permuted Pixel MNIST Demo 
Light weighted demo of our DilatedRNN on Pixel MNist with permutation.

In [1]:
import sys
sys.path.append("./models")
import numpy as np
import tensorflow as tf
from classification_models import drnn_classification

In [2]:
# configurations
# processed data from the Zoneout paper:
# https://github.com/teganmaharaj/wtfcptb/raw/master/char_level_penntree.npz
data_path = "./char_level_penntree.npz"
n_steps = 100 #length of input sequence
input_dims = 64 # char embedding dimension
n_classes = 50 # vocab size

# model config
cell_type = "GRU"
assert(cell_type in ["RNN", "LSTM", "GRU"])
hidden_structs = [256] * 7
dilations = [1, 2, 4, 8, 16, 32, 64]
assert(len(hidden_structs) == len(dilations))

# learning config
batch_size = 64
learning_rate = 1.0e-3
training_iters = batch_size * 30000
testing_step = 10
display_step = 10

test_batch_size = 128

In [7]:
class PTB_data(object):
    def __init__(self, data_dict):
        self.data_dict = data_dict
        
    def random_train_batch(self, batch_size, n_steps):
        """
        Randomly sample a batch of sequences of length n_steps from training set.
        """
        batch_x = np.zeros((batch_size, n_steps), dtype=np.int32)
        batch_y = np.zeros((batch_size,), dtype=np.int32)
        train_size = len(self.data_dict['train'])
        offsets = np.random.choice(range(train_size-n_steps-1), batch_size)
        for idx in range(batch_size):
            offset = offsets[idx]
            batch_x[idx,:] = self.data_dict['train'][offset:offset+n_steps]
            batch_y[idx] = self.data_dict['train'][offset+n_steps]
        return batch_x, batch_y
    
    def get_validation_batches(self, batch_size, n_steps):
        """
        A generator for all validation data in mini batches.
        """
        validation_size = len(self.data_dict['valid'])
        batch_x = np.zeros((batch_size, n_steps), dtype=np.int32)
        batch_y = np.zeros((batch_size,), dtype=np.int32)
        n_batches = (validation_size-n_steps-1) / batch_size
        
        for batch_id in range(n_batches):
            offset_base = batch_id * batch_size
            for idx in range(batch_size):
                offset = offset_base + idx
                batch_x[idx,:] = self.data_dict['valid'][offset:offset+n_steps]
                batch_y[idx] = self.data_dict['valid'][offset+n_steps]
            yield batch_x, batch_y

ptb_data = PTB_data(np.load(data_path))

In [4]:
# build computation graph
tf.reset_default_graph()
x = tf.placeholder(tf.int32, [None, n_steps])
y = tf.placeholder(tf.int32, [None,])

char_embeddings = tf.get_variable("char_embeddings", [n_classes, input_dims])
x_emb = tf.nn.embedding_lookup(char_embeddings, x)

global_step = tf.Variable(0, name='global_step', trainable=False)

# build prediction graph
print "==> Building a dRNN with %s cells" %cell_type
pred = drnn_classification(x_emb, hidden_structs, dilations, n_steps, n_classes, input_dims, cell_type)

# build loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=y))
bpc_cost = cost / np.log(2.0)
optimizer = tf.train.RMSPropOptimizer(learning_rate, 0.9).minimize(bpc_cost, global_step=global_step)


==> Building a dRNN with GRU cells
Instructions for updating:
Use the retry module or similar alternatives.
Building layer: multi_dRNN_dilation_1, input length: 100, dilation rate: 1, input dim: 64.
=====> Input length for sub-RNN: 100
Building layer: multi_dRNN_dilation_2, input length: 100, dilation rate: 2, input dim: 256.
=====> Input length for sub-RNN: 50
Building layer: multi_dRNN_dilation_4, input length: 100, dilation rate: 4, input dim: 256.
=====> Input length for sub-RNN: 25
Building layer: multi_dRNN_dilation_8, input length: 100, dilation rate: 8, input dim: 256.
=====> 4 time points need to be padded. 
=====> Input length for sub-RNN: 13
Building layer: multi_dRNN_dilation_16, input length: 100, dilation rate: 16, input dim: 256.
=====> 12 time points need to be padded. 
=====> Input length for sub-RNN: 7
Building layer: multi_dRNN_dilation_32, input length: 100, dilation rate: 32, input dim: 256.
=====> 28 time points need to be padded. 
=====> Input length for sub-RNN:

In [5]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
step = 0
train_results = []
validation_results = []
test_results = []

while step * batch_size < training_iters:
    batch_x, batch_y = ptb_data.random_train_batch(batch_size, n_steps)    

    feed_dict = {
        x : batch_x,
        y : batch_y
    }
    bpc_cost_, step_,  _ = sess.run([bpc_cost, global_step, optimizer], feed_dict=feed_dict)    
    train_results.append((step_, bpc_cost_))    

    if (step + 1) % display_step == 0:
        print "Iter " + str(step + 1) + ", Minibatch Loss: " + "{:.6f}".format(bpc_cost_)
             
    if (step + 1) % testing_step == 0:
        
        # validation performance
        batch_bpcs = []
        for batch_x, batch_y in ptb_data.get_validation_batches(test_batch_size, n_steps):
            feed_dict = {
                x : batch_x,
                y : batch_y
            }
            bpc_cost_, step_ = sess.run([bpc_cost, global_step], feed_dict=feed_dict) 
            batch_bpcs.append(bpc_cost_)
        validation_bpc = np.mean(batch_bpcs)
        print "========> Validation BPC: " + "{:.6f}".format(validation_bpc) + " over %d batches" % len(batch_bpcs)
    step += 1

Iter 10, Minibatch Loss: 5.847838
