# Bidirectional RNN and GRU Cells for Digit recognizer

## Setup

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

## Data

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

## One hot encoding

In [3]:
y = train['label'].values.reshape((-1, 1))
X = train.drop('label', axis = 1).values
y_oh = to_categorical(y)
print(y.shape)
print(y_oh.shape)

(42000, 1)
(42000, 10)


## Validation data

In [4]:
X_train, X_vali, y_train, y_vali = train_test_split(X, y_oh, test_size = 0.2, random_state = 42)
print(X_train.shape, X_vali.shape, y_train.shape, y_vali.shape)

(33600, 784) (8400, 784) (33600, 10) (8400, 10)


## Bidirectional RNN with GRU cells

In [5]:
tf.reset_default_graph()

In [6]:
def next_batch(num, data, labels):
    idx = np.arange(0, len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    
    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [7]:
# parameters
time_steps = 784
# batch_size = 128
batch_size = 280
num_classes = 10
# hidden_layer_size = 32
hidden_layer_size = 16

In [8]:
_inputs = tf.placeholder(tf.float32, shape = [batch_size, time_steps, 1])
_labels = tf.placeholder(tf.float32, shape = [batch_size, num_classes])

In [9]:
validation_data = X_vali[:batch_size].reshape((-1, time_steps, 1))
validation_label = y_vali[:batch_size]

In [10]:
print(validation_data.shape)
print(validation_label.shape)

(280, 784, 1)
(280, 10)


In [11]:
with tf.name_scope("biGRU"):
    with tf.variable_scope('forward'):
        gru_fw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
        gru_fw_cell = tf.contrib.rnn.DropoutWrapper(gru_fw_cell)

    with tf.variable_scope('backward'):
        gru_bw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
        gru_bw_cell = tf.contrib.rnn.DropoutWrapper(gru_bw_cell)

    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell,
                                                      cell_bw = gru_bw_cell,
                                                      inputs = _inputs,
                                                      dtype = tf.float32,
                                                      scope = "BiGRU")

states = tf.concat(values = states, axis = 1)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.


In [12]:
weights = {'linear_layer': tf.Variable(tf.truncated_normal([2 * hidden_layer_size, num_classes], mean = 0, stddev = 0.01))}
biases = {'linear_layer': tf.Variable(tf.truncated_normal([num_classes], mean = 0, stddev = 0.01))}

final_output = tf.matmul(states, weights["linear_layer"]) + biases["linear_layer"]
softmax = tf.nn.softmax_cross_entropy_with_logits(logits = final_output, labels = _labels)
cross_entropy = tf.reduce_mean(softmax)

train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(_labels, 1), tf.argmax(final_output, 1))
accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32))) * 100

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [13]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(10000):
    batch_x, batch_y = next_batch(batch_size, X_train, y_train)
    batch_x = batch_x.reshape((batch_size, time_steps, 1))
    sess.run(train_step, feed_dict = {_inputs: batch_x,
                                      _labels: batch_y})
    
    if i % 1000 == 0:
        acc, loss = sess.run([accuracy, cross_entropy], feed_dict = {_inputs: batch_x,
                                                                     _labels: batch_y})
        val_acc = sess.run(accuracy, feed_dict = {_inputs: validation_data,
                                                  _labels: validation_label})
        print("Iter = " + str(i) + " Loss = {:.6f}".format(loss) + " Accuracy = {:.5f}".format(acc) + 
              " Validation accuracy = {:.5f}".format(val_acc))

Iter = 0 Loss = 2.302186 Accuracy = 8.57143 Validation accuracy = 12.14286
Iter = 1000 Loss = 1.398895 Accuracy = 40.35714 Validation accuracy = 44.28571
Iter = 2000 Loss = 1.118548 Accuracy = 59.64286 Validation accuracy = 61.78572
Iter = 3000 Loss = 0.926202 Accuracy = 65.35714 Validation accuracy = 66.42857
Iter = 4000 Loss = 0.935283 Accuracy = 67.14285 Validation accuracy = 68.92857
Iter = 5000 Loss = 0.745229 Accuracy = 76.07143 Validation accuracy = 74.28571
Iter = 6000 Loss = 0.683156 Accuracy = 76.42857 Validation accuracy = 76.42857
Iter = 7000 Loss = 0.777725 Accuracy = 74.64285 Validation accuracy = 71.78571
Iter = 8000 Loss = 0.656688 Accuracy = 78.57143 Validation accuracy = 81.07143
Iter = 9000 Loss = 0.580691 Accuracy = 82.14286 Validation accuracy = 79.28572


ter = 0 Loss = 2.301327 Accuracy = 11.71875 Validation accuracy = 7.03125
Iter = 1000 Loss = 1.559995 Accuracy = 39.06250 Validation accuracy = 47.65625
Iter = 2000 Loss = 1.138049 Accuracy = 53.90625 Validation accuracy = 67.96875
Iter = 3000 Loss = 0.874593 Accuracy = 71.09375 Validation accuracy = 75.78125
Iter = 4000 Loss = 0.569118 Accuracy = 80.46875 Validation accuracy = 78.12500
Iter = 5000 Loss = 0.903366 Accuracy = 66.40625 Validation accuracy = 71.87500
Iter = 6000 Loss = 0.594484 Accuracy = 78.12500 Validation accuracy = 86.71875
Iter = 7000 Loss = 0.431124 Accuracy = 83.59375 Validation accuracy = 86.71875
Iter = 8000 Loss = 0.495741 Accuracy = 84.37500 Validation accuracy = 85.93750
Iter = 9000 Loss = 0.318530 Accuracy = 91.40625 Validation accuracy = 86.71875

## Test data prediction

In [18]:
X_test = test.values.reshape((-1, time_steps, 1))
print(test.shape)
print(X_test.shape)

(28000, 784)
(28000, 784, 1)


In [19]:
y_pred_list = []

for i in range(X_test.shape[0]//batch_size):
    X_test_iter = X_test[(i*batch_size):((i+1)*batch_size)]
    y_pred = sess.run(tf.argmax(final_output, 1), feed_dict = {_inputs: X_test_iter})
    
    y_pred_list.append(y_pred)
    
y_pred_list = [item for sublist in y_pred_list for item in sublist]

## Submission

In [20]:
test_id = np.arange(1, X_test.shape[0] + 1, 1)
print(test_id.shape)
print(len(y_pred_list))

(28000,)
28000


In [21]:
sub = pd.DataFrame(data = {'ImageId': test_id,
                           'Label': y_pred_list})
sub.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,2


In [22]:
sub.to_csv('C:\\Users\\yukic\\Documents\\kaggle\\digit_recognizer\\submission_190910.csv', index = False)