In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import InputLayer, Input
from tensorflow.python.keras.layers import Reshape, MaxPooling2D
from tensorflow.python.keras.layers import Conv2D, Dense, Flatten
from keras.utils import to_categorical

Using TensorFlow backend.


Will implement [conv-relu-pool]xN -> [affine]xM -> [softmax or SVM] and write script to print accuracy of models on test and train data

# Preprocess the Data

In [4]:
data = pd.read_csv("./data/handwritten_data_785.csv", encoding = "utf8")

In [5]:
np.random.seed(0)
data = data.values
np.random.shuffle(data)
X, y = data[:,1:], data[:,0]

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print("Train: {} {}".format(X_train.shape, y_train.shape))
print("Valid: {} {}".format(X_valid.shape, y_valid.shape))
print("Test: {} {}".format(X_test.shape, y_test.shape))

Train: (297629, 784) (297629,)
Valid: (59526, 784) (59526,)
Test: (74408, 784) (74408,)


In [14]:
mean_image = np.mean(X_train, axis=0).astype(np.int64)
X_train = (X_train - mean_image)/255
X_test = (X_test - mean_image)/255

In [15]:
train_Y_one_hot = to_categorical(y_train)
test_Y_one_hot = to_categorical(y_test)

# Data Dimensions

In [29]:
print(X_train.shape)
print(X_test.shape)

#images are 28x28
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)

# Tuple with height, width and depth used to reshape arrays.
# This is used for reshaping in Keras.
img_shape_full = (img_size, img_size, 1)

num_channels = 1
num_classes = 26

X_train = X_train.reshape(X_train.shape[0], img_size, img_size, 1)
X_test = X_test.reshape(X_test.shape[0], img_size, img_size, 1)

(297629, 784)
(74408, 784)


# Test: One layer of each ([conv-relu-pool] -> [affine]-> [softmax or SVM])

Remember, deeper networks is always better, at the cost of more data and increased complexity of learning.
Minibatch size is usually set of few hundreds. 
You should initially use fewer filters and gradually increase and monitor the error rate to see how it is varying.
Very small filter sizes will capture very fine details of the image. On the other hand having a bigger filter size 
will leave out minute details in the image.
https://www.quora.com/How-can-I-decide-the-kernel-size-output-maps-and-layers-of-CNN

# Keras Model

In [19]:
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.layers import LeakyReLU
from tensorflow.python.keras import initializers
from keras.wrappers.scikit_learn import KerasClassifier

optimizer = Adam(lr=1e-3)

In [31]:
'''
    Function to create model, required for KerasClassifier
    
    We pass this function name to the KerasClassifier class by the build_fn argument. 
    We also pass in additional arguments of epochs and batch_size. 
    These are automatically bundled up and passed on to the fit() function which is called 
    internally by the KerasClassifier class.
    
    https://machinelearningmastery.com/use-keras-deep-learning-models-scikit-learn-python/
'''
def create_model():
    model = Sequential()
    '''
    initializer: he_normal
    A more recent paper on this topic, Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification by He et al., 
    derives an initialization specifically for ReLU neurons, reaching the conclusion that the variance of neurons 
    in the network should be 2.0/n. This gives the initialization w = np.random.randn(n) * sqrt(2.0/n), 
    and is the current recommendation for use in practice in the specific case of neural networks with ReLU neurons.
    '''
    #convolutional layer
    model.add(Conv2D(kernel_size=7, strides=1, filters=16, padding='same',
                     activation='linear', name='layer_conv1', input_shape=img_shape_full))
    #reLU activation
    model.add(LeakyReLU(alpha=0.1))
    #Pooling Layer
    model.add(MaxPooling2D(pool_size=2, strides=2, padding='same'))

    model.add(Flatten())
    #Fully-Connected Layer with 128 outputs Adding this in improved scores
    model.add(Dense(128, activation='linear'))
    model.add(LeakyReLU(alpha=0.1))
    #Fully-Connected Layer with Softmax
    model.add(Dense(num_classes, activation='softmax'))

    '''
    For multiclass classification problems like MNIST, cross entropy is typically used as the loss metric
    '''
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model
model = KerasClassifier(build_fn=create_model, epochs=3, batch_size=150, verbose=1)

# Cross-Validation

In [32]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

In [33]:
'''
    Must use non-one-hot data:
        
    keras.utils.to_categorical produces a one-hot encoded class vector, i
    .e. the multilabel-indicator mentioned in the error message. 
    StratifiedKFold is not designed to work with such input;
    from:
    https://stackoverflow.com/questions/48508036/sklearn-stratifiedkfold-valueerror-supported-target-types-are-binary-mul
'''
kfold = StratifiedKFold(n_splits=5, shuffle=False)
results = cross_val_score(model, X_train, y_train, cv=kfold)
model_accuracy = results.mean()

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [34]:
print("Model Cross-Validation Accuracy = ", model_accuracy)

Model Cross-Validation Accuracy =  0.9817255074409299


<strong>without he_normal weights initialization and 0.01 biases initialization:</strong>

Train on 297629 samples, validate on 74408 samples

Epoch 1/1
loss: 0.1502 - acc: 0.9591 

val_loss: 0.0927 - val_acc: 0.9762

<strong>using he_normal weight initialization and 0.01 biases initialization actually decreased accuracy by 0.0014</strong>

Train on 297629 samples, validate on 74408 samples

Epoch 1/1

loss: 0.1835 - acc: 0.9494 - 

val_loss: 0.0963 - val_acc: 0.9748

In [None]:
test_eval = model.evaluate(test_X, test_Y_one_hot, verbose=0)


In [None]:
accuracy = model_train.history['acc']
val_accuracy = model_train.history['val_acc']
loss = model_train.history['loss']
val_loss = model_train.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()