In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [45]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import InputLayer, Input
from tensorflow.python.keras.layers import Reshape, MaxPooling2D
from tensorflow.python.keras.layers import Conv2D, Dense, Flatten
from keras.utils import to_categorical

Will implement [conv-relu-pool]xN -> [affine]xM -> [softmax or SVM] and write script to print accuracy of models on test and train data

# Preprocess the Data

In [2]:
data = pd.read_csv("./data/handwritten_data_785.csv", encoding = "utf8")

In [3]:
np.random.seed(0)
data = data.values
np.random.shuffle(data)
X, y = data[:,1:], data[:,0]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Data Dimensions

In [30]:
print(X_train.shape)
print(X_test.shape)

#images are 28x28
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)

# Tuple with height, width and depth used to reshape arrays.
# This is used for reshaping in Keras.
img_shape_full = (img_size, img_size, 1)

num_channels = 1
num_classes = 26

X_train = X_train.reshape(X_train.shape[0], img_size, img_size, 1)
X_test = X_test.reshape(X_test.shape[0], img_size, img_size, 1)

(297629, 28, 28, 1)
(74408, 28, 28, 1)


In [31]:
mean_image = np.mean(X_train, axis=0).astype(np.int64)
X_train = (X_train - mean_image)/255
X_test = (X_test - mean_image)/255

In [33]:
train_Y_one_hot = to_categorical(y_train)
test_Y_one_hot = to_categorical(y_test)

# Helper Functions

In [11]:
def create_biases(length):
    return tf.Variable(tf.constant(0.01, shape=[length]))
'''
One problem is that the distribution of the outputs from a randomly initialized 
neuron has a variance that grows with the number of inputs. It turns out that we can normalize the variance 
of each neuron’s output to 1 by scaling its weight vector by the square root of its fan-in (i.e. its number of inputs).
That is, the recommended heuristic is to initialize each neuron’s weight vector as: w = np.random.randn(n) / sqrt(n), 
where n is the number of its inputs. This ensures that all neurons in the network initially have approximately the 
same output distribution and empirically improves the rate of convergence.
'''
# xavier_initializer is 2/sqrt(n) when uniform=False
def create_weights(shape):
    initializer = tf.contrib.layers.xavier_initializer(uniform=False)
    return tf.Variable(initializer(shape))

def conv_layer(input, input_channels, filter_size, num_filters, x_stride=1, y_stride=1):
    '''
    input (4d Tensor):
        -Image number.
        -Y-axis of each image.
        -X-axis of each image.
        -Channels of each image.
    input_channels: number of channels in the input
    filter_size: window size of filter (ex: 5x5)
    num_filters: number of filters to use
    x_stride: amount to move filter over on x axis
    y_stride: amount to move filter over on y axis
        ex: strides=[1, 2, 2, 1] would mean that the filter
        is moved 2 pixels across the x- and y-axis of the image
    use_batchnorm: whether or not to use batch normalization on this layer
    
    initializes weights
    '''
    weights_shape = [filter_size, filter_size, input_channels, num_filters]
    weights = create_weights(weights_shape)
    
    biases = create_biases(num_filters)
    
    #create layer
    layer = tf.nn.conv2d(input, weights, strides=[1,x_stride,y_stride,1], padding='VALID') + biases
    
    # ** maybe split pooling and batch_norm??
    layer = tf.nn.relu(layer)
    
    layer = tf.nn.max_pool(layer, ksize=[1,2,2,1], strides=[1, 2, 2, 1], padding='VALID')
    return layer, weights

def flatten(layer):
    '''
    flattens a layer to feed it as input to a fully connected layer
    '''
    shape = layer.get_shape()

    # The shape of the input layer is assumed to be:
    # layer_shape == [num_images, img_height, img_width, num_channels]

    # The number of features is: img_height * img_width * num_channels
    num_features = shape[1:4].num_elements()
    
    # If one component of shape is the special value -1, 
    # the size of that dimension is computed so that the total size remains constant.
    layer_flat = tf.reshape(layer, [-1, num_features])

    # The shape of the flattened layer is now:
    # [num_images, img_height * img_width * num_channels]

    # Return both the flattened layer and the number of features.
    return layer_flat, num_features

def new_fc_layer(input, num_inputs, num_outputs):
    '''
    fully_connected_layer
    '''
    weights = create_weights((num_inputs, num_outputs))
    biases = create_biases(num_outputs)
    
    return tf.matmul(input, weights) + biases
    

# Test: One layer of each ([conv-relu-pool] -> [affine]-> [softmax or SVM])

Remember, deeper networks is always better, at the cost of more data and increased complexity of learning.
Minibatch size is usually set of few hundreds. 
You should initially use fewer filters and gradually increase and monitor the error rate to see how it is varying.
Very small filter sizes will capture very fine details of the image. On the other hand having a bigger filter size 
will leave out minute details in the image.
https://www.quora.com/How-can-I-decide-the-kernel-size-output-maps-and-layers-of-CNN

In [13]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, img_size, img_size, num_channels])
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)

conv_layer1, weights_conv_layer1 = conv_layer(X,num_channels,filter_size=7,num_filters=16)
conv_layer_flat,num_features = flatten(conv_layer1)
fc_layer = new_fc_layer(conv_layer_flat, num_features, num_classes)

y_pred = tf.nn.softmax(fc_layer)
y_pred_cls = tf.argmax(y_pred, axis=1)

#define loss with cross-entropy
'''
For multiclass classification problems like MNIST, cross entropy is typically used as the loss metric
'''
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc_layer,
                                                        labels=y_true)
cost = tf.reduce_mean(cross_entropy)

# Adam optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(cost)

#Performance Measures
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Session Run

In [14]:
session = tf.Session()
session.run(tf.global_variables_initializer())
train_batch_size = 200

total_iterations = 0

def optimize(num_iterations):
    # Ensure we update the global variable rather than a local copy.
    global total_iterations

    # Start-time used for printing time-usage below.
    start_time = time.time()

    for i in range(total_iterations,
                   total_iterations + num_iterations):

        # Get a batch of training examples.
        # x_batch now holds a batch of images and
        # y_true_batch are the true labels for those images.
        x_batch =
        y_true_batch =

        # Put the batch into a dict with the proper names
        # for placeholder variables in the TensorFlow graph.
        feed_dict_train = {X: x_batch,
                           y_true: y_true_batch}

        # Run the optimizer using this batch of training data.
        # TensorFlow assigns the variables in feed_dict_train
        # to the placeholder variables and then runs the optimizer.
        session.run(optimizer, feed_dict=feed_dict_train)

        # Print status every 100 iterations.
        if i % 100 == 0:
            # Calculate the accuracy on the training-set.
            acc = session.run(accuracy, feed_dict=feed_dict_train)

            # Message for printing.
            msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"

            # Print it.
            print(msg.format(i + 1, acc))

    # Update the total number of iterations performed.
    total_iterations += num_iterations

    # Ending time.
    end_time = time.time()

    # Difference between start and end-times.
    time_dif = end_time - start_time

    # Print the time-usage.
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
    
def run_model(session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

# Keras

In [50]:
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.layers import LeakyReLU

optimizer = Adam(lr=1e-3)

In [60]:
model = Sequential()

model.add(Conv2D(kernel_size=7, strides=1, filters=16, padding='same',
                 activation='linear', name='layer_conv1', input_shape=img_shape_full))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=2, strides=2, padding='same'))

model.add(Flatten())

model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x=X_train,
          y=train_Y_one_hot,
          epochs=1, batch_size=128,verbose=1)

Epoch 1/1


<tensorflow.python.keras._impl.keras.callbacks.History at 0x12180bba8>