# Digit Recognizer using Double Layer CNN
* https://www.kaggle.com/c/digit-recognizer

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

## prepareing data
Download data from https://www.kaggle.com/c/digit-recognizer/data

In [2]:
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

validation_size = 2000

In [3]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
test  = pd.read_csv('../input/digit-recognizer/test.csv')

In [4]:
print(train.shape)
print(test.shape)

(42000, 785)
(28000, 784)


split train data to labels and pixels.

In [5]:
features = (train.ix[:,1:].values).astype('float32')
labels = pd.get_dummies(train.ix[:,0]).astype('float32')

In [6]:
print(features.shape)
print(labels.shape)

(42000, 784)
(42000, 10)


In [7]:
# split data into training & validation
valid_features = features[:validation_size]
valid_labels = labels[:validation_size]

train_features = features[validation_size:]
train_labels = labels[validation_size:]

In [8]:
print(train_features.shape)
print(train_labels.shape)
print(valid_features.shape)
print(valid_labels.shape)

(40000, 784)
(40000, 10)
(2000, 784)
(2000, 10)


In [9]:
test_features = (test.values).astype('float32')

In [10]:
print(test_features.shape)

(28000, 784)


## Make a TensorFlow Graph

In [11]:
# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])

Make a first convolultion layer.

In [12]:
num_filters1 = 32

x_image = tf.reshape(features, [-1,28,28,1])

W_conv1 = tf.Variable(tf.truncated_normal([5,5,1,num_filters1], stddev=0.1))
h_conv1 = tf.nn.conv2d(x_image, W_conv1, strides=[1,1,1,1], padding="SAME")
b_conv1 = tf.Variable(tf.constant(0.1, shape=[num_filters1]))
h_conv1_cutoff = tf.nn.relu(h_conv1 + b_conv1)
h_pool1 = tf.nn.max_pool(h_conv1_cutoff, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

Make a second convolution layer.

In [13]:
num_filters2 = 64

W_conv2 = tf.Variable(tf.truncated_normal([5,5,num_filters1,num_filters2], stddev=0.1))
h_conv2 = tf.nn.conv2d(h_pool1, W_conv2, strides=[1,1,1,1], padding="SAME")
b_conv2 = tf.Variable(tf.constant(0.1, shape=[num_filters2]))
h_conv2_cutoff = tf.nn.relu(h_conv2 + b_conv2)
h_pool2 = tf.nn.max_pool(h_conv2_cutoff, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

Make fully-connected graph

In [14]:
num_units1 = 7*7*num_filters2
num_units2 = 1024

h_pool_flat = tf.reshape(h_pool2, [-1,num_units1])

# Weights & bias
w1 = tf.Variable(tf.random_normal([num_units1, num_units2]))
b1 = tf.Variable(tf.random_normal([num_units2]))

# Hidden Layer - a(xW1 + b1)
z1 = tf.add(tf.matmul(h_pool_flat, w1), b1)
h1 = tf.nn.relu(z1)

keep_prob = tf.placeholder(tf.float32)
h1_drop = tf.nn.dropout(h1, keep_prob)

# Weights & bias
w0 = tf.Variable(tf.random_normal([num_units2, n_classes]))
b0 = tf.Variable(tf.random_normal([n_classes]))

# Logits - h1W0 + b0
logits = tf.add(tf.matmul(h1_drop, w0), b0)

# Define loss and optimizer
learning_rate = tf.placeholder(tf.float32)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Calculate accuracy
predict = tf.argmax(logits, 1)
correct_prediction = tf.equal(predict, tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

## Training

Define helper functions.

In [20]:
def print_epoch_stats(epoch_i, sess, last_features, last_labels, prob):
    """
    Print cost and validation accuracy of an epoch
    """
    current_cost = sess.run(
        cost,
        feed_dict={features: last_features, labels: last_labels, keep_prob: prob})
    valid_accuracy = sess.run(
        accuracy,
        feed_dict={features: valid_features, labels: valid_labels, keep_prob: prob})
    print('Epoch: {:<4} - Cost: {:<8.3} Valid Accuracy: {:<5.3}'.format(
        epoch_i,
        current_cost,
        valid_accuracy))

In [16]:
import math
def batches(batch_size, features, labels):
    """
    Create batches of features and labels
    :param batch_size: The batch size
    :param features: List of features
    :param labels: List of labels
    :return: Batches of (Features, Labels)
    """
    assert len(features) == len(labels)
    outout_batches = []
    
    sample_size = len(features)
    for start_i in range(0, sample_size, batch_size):
        end_i = start_i + batch_size
        batch = [features[start_i:end_i], labels[start_i:end_i]]
        outout_batches.append(batch)
        
    return outout_batches

HyperParameters

In [17]:
batch_size = 50
epochs = 10
learn_rate = 0.0001
prob = 0.5

In [18]:
train_batches = batches(batch_size, train_features, train_labels)

In [None]:
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch_i in range(epochs):

        # Loop over all batches
        for batch_features, batch_labels in train_batches:
            train_feed_dict = {
                features: batch_features,
                labels: batch_labels,
                learning_rate: learn_rate,
                keep_prob: prob}
            sess.run(optimizer, feed_dict=train_feed_dict)

        # Print cost and validation accuracy of an epoch
        print_epoch_stats(epoch_i, sess, batch_features, batch_labels, prob)

    predictions = sess.run(
                        predict, 
                        feed_dict={features: test_features,
                                   keep_prob: prob})

Epoch: 0    - Cost: 3.82e+03 Valid Accuracy: 0.624
Epoch: 1    - Cost: 1.44e+03 Valid Accuracy: 0.711
Epoch: 2    - Cost: 1.31e+03 Valid Accuracy: 0.738
Epoch: 3    - Cost: 6.15e+02 Valid Accuracy: 0.787
Epoch: 4    - Cost: 3.91e+02 Valid Accuracy: 0.812
Epoch: 5    - Cost: 3.98e+02 Valid Accuracy: 0.816
Epoch: 6    - Cost: 69.6     Valid Accuracy: 0.811
Epoch: 7    - Cost: 2.23e+02 Valid Accuracy: 0.84 
Epoch: 8    - Cost: 2.2e+02  Valid Accuracy: 0.842
Epoch: 9    - Cost: 66.2     Valid Accuracy: 0.867


## Write to file

In [None]:
submissions = pd.DataFrame({"ImageId": list(range(1, len(predictions)+1)),
                             "Label": predictions})
submissions.to_csv("output.csv", index=False, header=True)