In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
import tensorflow as tf

  from ._conv import register_converters as _register_converters


Will implement [conv-relu-pool]xN -> [affine]xM -> [softmax or SVM] and write script to print accuracy of models on test and train data

# Preprocess the Data

In [2]:
data = pd.read_csv("./data/handwritten_data_785.csv", encoding = "utf8")

In [3]:
np.random.seed(0)
data = data.values
np.random.shuffle(data)
X, y = data[:,1:], data[:,0]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Data Dimensions

In [5]:
print(X_train.shape)
print(X_test.shape)

#images are 28x28
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)

num_channels = 1
num_classes = 26

X_train = X_train.reshape(X_train.shape[0], img_size, img_size, 1)
X_test = X_test.reshape(X_test.shape[0], img_size, img_size, 1)

(297629, 784)
(74408, 784)


In [6]:
mean_image = np.mean(X_train, axis=0).astype(np.int64)
X_train = (X_train - mean_image)/255
X_test = (X_test - mean_image)/255

# Helper Functions

In [11]:
def create_biases(length):
    return tf.Variable(tf.constant(0.01, shape=[length]))
'''
One problem is that the distribution of the outputs from a randomly initialized 
neuron has a variance that grows with the number of inputs. It turns out that we can normalize the variance 
of each neuron’s output to 1 by scaling its weight vector by the square root of its fan-in (i.e. its number of inputs).
That is, the recommended heuristic is to initialize each neuron’s weight vector as: w = np.random.randn(n) / sqrt(n), 
where n is the number of its inputs. This ensures that all neurons in the network initially have approximately the 
same output distribution and empirically improves the rate of convergence.
'''
# xavier_initializer is 2/sqrt(n) when uniform=False
def create_weights(shape):
    initializer = tf.contrib.layers.xavier_initializer(uniform=False)
    return tf.Variable(initializer(shape))

def conv_layer(input, input_channels, filter_size, num_filters, x_stride=1, y_stride=1):
    '''
    input (4d Tensor):
        -Image number.
        -Y-axis of each image.
        -X-axis of each image.
        -Channels of each image.
    input_channels: number of channels in the input
    filter_size: window size of filter (ex: 5x5)
    num_filters: number of filters to use
    x_stride: amount to move filter over on x axis
    y_stride: amount to move filter over on y axis
        ex: strides=[1, 2, 2, 1] would mean that the filter
        is moved 2 pixels across the x- and y-axis of the image
    use_batchnorm: whether or not to use batch normalization on this layer
    
    initializes weights
    '''
    weights_shape = [filter_size, filter_size, input_channels, num_filters]
    weights = create_weights(weights_shape)
    
    biases = create_biases(num_filters)
    
    #create layer
    layer = tf.nn.conv2d(input, weights, strides=[1,x_stride,y_stride,1], padding='VALID') + biases
    
    # ** maybe split pooling and batch_norm??
    layer = tf.nn.relu(layer)
    
    layer = tf.nn.max_pool(layer, ksize=[1,2,2,1], strides=[1, 2, 2, 1], padding='VALID')
    return layer, weights

def flatten(layer):
    '''
    flattens a layer to feed it as input to a fully connected layer
    '''
    shape = layer.get_shape()

    # The shape of the input layer is assumed to be:
    # layer_shape == [num_images, img_height, img_width, num_channels]

    # The number of features is: img_height * img_width * num_channels
    num_features = shape[1:4].num_elements()
    
    # If one component of shape is the special value -1, 
    # the size of that dimension is computed so that the total size remains constant.
    layer_flat = tf.reshape(layer, [-1, num_features])

    # The shape of the flattened layer is now:
    # [num_images, img_height * img_width * num_channels]

    # Return both the flattened layer and the number of features.
    return layer_flat, num_features

def new_fc_layer(input, num_inputs, num_outputs):
    '''
    fully_connected_layer
    '''
    weights = create_weights((num_inputs, num_outputs))
    biases = create_biases(num_outputs)
    
    return tf.matmul(input, weights) + biases
    

# Test: One layer of each ([conv-relu-pool] -> [affine]-> [softmax or SVM])

Remember, deeper networks is always better, at the cost of more data and increased complexity of learning.
Minibatch size is usually set of few hundreds. 
You should initially use fewer filters and gradually increase and monitor the error rate to see how it is varying.
Very small filter sizes will capture very fine details of the image. On the other hand having a bigger filter size 
will leave out minute details in the image.
https://www.quora.com/How-can-I-decide-the-kernel-size-output-maps-and-layers-of-CNN

In [12]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, img_size, img_size, num_channels])
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)

conv_layer1, weights_conv_layer1 = conv_layer(X,num_channels,filter_size=7,num_filters=16)
conv_layer_flat,num_features = flatten(conv_layer1)
fc_layer = new_fc_layer(conv_layer_flat, num_features, num_classes)

y_pred = tf.nn.softmax(fc_layer)
y_pred_cls = tf.argmax(y_pred, axis=1)