# Recurrent neural network for Digit Recognizer

## Setup

In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from tensorflow.keras.utils import to_categorical

## Data

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
train.shape

(42000, 785)

In [4]:
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
np.sqrt(784)

28.0

## One hot encoding

In [7]:
y = train['label'].values.reshape((-1, 1))
X = train.drop('label', axis = 1).values
print(y.shape, X.shape)

(42000, 1) (42000, 784)


In [8]:
y_oh = to_categorical(y)
y_oh.shape

(42000, 10)

In [9]:
y_oh

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

## Validation data

In [10]:
X_train, X_vali, y_train, y_vali = train_test_split(X, y_oh, test_size = 0.2, random_state = 42)

In [11]:
print(X_train.shape, X_vali.shape, y_train.shape, y_vali.shape)

(33600, 784) (8400, 784) (33600, 10) (8400, 10)


## RNN

In [12]:
tf.reset_default_graph()

In [13]:
# parameters
element_size = 28
time_steps = 28
num_classes = 10
# batch_size = 128
batch_size = 280
# hidden_layer_size = 128
hidden_layer_size = 256

In [14]:
x = tf.placeholder(tf.float32, shape = [None, time_steps, element_size], name = 'inputs')
y = tf.placeholder(tf.float32, shape = [None, num_classes], name = 'labels')

In [15]:
validation_data = X_vali[:batch_size].reshape((-1, time_steps, element_size))
validation_label = y_vali[:batch_size]

In [16]:
def next_batch(num, data, labels):
    idx = np.arange(0, len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [17]:
Wx = tf.Variable(tf.zeros([element_size, hidden_layer_size]))
Wh = tf.Variable(tf.zeros([hidden_layer_size, hidden_layer_size]))
b_rnn = tf.Variable(tf.zeros([hidden_layer_size]))

def rnn_step(previous_hidden_state, x):
    current_hidden_state = tf.tanh(tf.matmul(previous_hidden_state, Wh) + tf.matmul(x, Wx) + b_rnn)
    return current_hidden_state    

processed_input = tf.transpose(x, perm = [1, 0, 2])
initial_hidden = tf.zeros([batch_size, hidden_layer_size])

all_hidden_states = tf.scan(rnn_step, processed_input, initializer = initial_hidden)

Wl = tf.Variable(tf.truncated_normal([hidden_layer_size, num_classes], mean = 0, stddev = 0.01))
bl = tf.Variable(tf.truncated_normal([num_classes], mean = 0, stddev = 0.01))

def get_linear_layer(hidden_state):
    return tf.matmul(hidden_state, Wl) + bl

all_outputs = tf.map_fn(get_linear_layer, all_hidden_states)
output = all_outputs[-1]
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = y))
train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(output, 1))
accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32))) * 100

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [18]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(10000):
    
    batch_x, batch_y = next_batch(batch_size, X_train, y_train)
    batch_x = batch_x.reshape((batch_size, time_steps, element_size))
    sess.run(train_step, feed_dict = {x: batch_x, y: batch_y})
    
    if i % 1000 == 0:
        acc, loss = sess.run([accuracy, cross_entropy], feed_dict = {x: batch_x, y: batch_y})
        val_acc = sess.run(accuracy, feed_dict = {x: validation_data, y: validation_label})
        print("Iteration = " + str(i) + " Loss = {:.6f}".format(loss) + " Accuracy = {:.5f}".format(acc) +
              " Validation accuracy = {:.5f}".format(val_acc))

Iteration = 0 Loss = 2.302687 Accuracy = 9.28571 Validation accuracy = 7.50000
Iteration = 1000 Loss = 1.026082 Accuracy = 59.28571 Validation accuracy = 60.35714
Iteration = 2000 Loss = 1.001983 Accuracy = 60.00000 Validation accuracy = 57.85714
Iteration = 3000 Loss = 0.744073 Accuracy = 71.42857 Validation accuracy = 71.42857
Iteration = 4000 Loss = 0.627137 Accuracy = 77.14286 Validation accuracy = 71.78571
Iteration = 5000 Loss = 0.518292 Accuracy = 82.14286 Validation accuracy = 74.64285
Iteration = 6000 Loss = 0.622324 Accuracy = 77.14286 Validation accuracy = 74.64285
Iteration = 7000 Loss = 0.499300 Accuracy = 82.50000 Validation accuracy = 77.85714
Iteration = 8000 Loss = 0.445070 Accuracy = 82.50000 Validation accuracy = 78.92857
Iteration = 9000 Loss = 0.397097 Accuracy = 85.35714 Validation accuracy = 80.35714


## Test data prediction

In [19]:
X_test = test.values.reshape((-1, time_steps, element_size))
print(test.shape)
print(X_test.shape)

(28000, 784)
(28000, 28, 28)


In [38]:
y_pred_list = []

for i in range(X_test.shape[0]//batch_size):
    X_test_iter = X_test[(i*batch_size):((i+1)*batch_size)]
    y_pred = sess.run(tf.argmax(output, 1), feed_dict = {x: X_test_iter})
    
    y_pred_list.append(y_pred)
    
y_pred_list = [item for sublist in y_pred_list for item in sublist]

## Submission

In [40]:
test_id = np.arange(1, X_test.shape[0] + 1, 1)
test_id

array([    1,     2,     3, ..., 27998, 27999, 28000])

In [44]:
print(test_id.shape)
print(len(y_pred_list))

(28000,)
28000


In [43]:
sub = pd.DataFrame(data = {'ImageId': test_id,
                           'Label': y_pred_list})
sub.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,2
4,5,2


In [45]:
sub.to_csv('C:\\Users\\yukic\\Documents\\kaggle\\digit_recognizer\\submission_190907.csv', index = False)