In [2]:
# imports
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
K.set_image_dim_ordering('th')

In [4]:
# clear GPU Memory
K.clear_session()

In [5]:
np.random.seed(7)

In [7]:
# load mnist data and reshape into a 2D structure the CNN expects for 2D convolutions
# data reshaped to [pixels][width][height]
# for RGB images, pixels = 3 for their r,g,b components such that each image is split into three channels
# for grayscale images, pixels = 1

In [6]:
# load data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [7]:
# reshape to CNN format [samples][pixels][width][height]
X_train = x_train.reshape(x_train.shape[0], 1, 28, 28).astype('float32')
X_test = x_test.reshape(x_test.shape[0], 1, 28, 28).astype('float32')

In [8]:
# normalize pixel values from 0,255 to 0,1
X_train = X_train / 255
X_test = X_test / 255

In [9]:
# encode classes into one hot encoded column vectors
Y_train = np_utils.to_categorical(y_train)
Y_test = np_utils.to_categorical(y_test)

In [10]:
num_classes = Y_test.shape[1]

In [11]:
# verify data structure shape
print(X_train.shape, X_test.shape)
print(Y_train.shape, Y_test.shape)

(60000, 1, 28, 28) (10000, 1, 28, 28)
(60000, 10) (10000, 10)


In [12]:
# define CNN architecture
# create a simple CNN network 
# Layer 1 - Convolution2D ; 32 feature maps of size 5x5 with a rectifier activation function. Expects images
# of structure pixels,width,height
# Layer 2 - MaxPooling2D ; pool size of 2x2
# Layer 3 - Dropout ; randomly remove 20% of neurons in the layer to reduce overfitting
# Layer 4 - Flatten ; converts 2D matrix into a vector to allow fully connected layers to process the output
# Layer 5 - Dense ; fully connected layer with 128 neurons and a relu activation function
# Layer 6 - Dense ; output later with neurons = num classes == 10 and a softmax activation function 
# to predict the probability for each class
def base_CNN2D():
    # init Sequential model
    model = Sequential()
    
    # Input layer - Layer 1
    model.add(
        Conv2D(32, (5,5), input_shape=(1, 28, 28), activation='relu')
    )
    # MaxPooling layer - Layer 2
    model.add(
        MaxPooling2D(pool_size=(2,2))
    )
    # Dropout layer - Layer 3
    model.add(
        Dropout(0.2)
    )
    # Flatten layer - Layer 4
    model.add(
        Flatten()
    )
    # Dense layer - Layer 5 - Hidden layer
    model.add(
        Dense(128, activation='relu')
    )
    # Dense layer - Layer 6 - Output layer
    model.add(
        Dense(num_classes, activation='softmax')
    )
    
    # compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
    

In [None]:
# clear unused image variables
del (x_train, x_test, y_train, y_test)

In [20]:
# train model for 10 epochs in batches of 200 images per epoch
model = base_CNN2D()
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=10, batch_size=200, verbose=2)
# store model evaluation scores
scores = model.evaluate(X_test, Y_test, verbose=0)
print('CNN Error {}'.format(100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 36s - loss: 0.2256 - acc: 0.9357 - val_loss: 0.0778 - val_acc: 0.9755
Epoch 2/10
 - 1s - loss: 0.0711 - acc: 0.9789 - val_loss: 0.0450 - val_acc: 0.9851
Epoch 3/10
 - 1s - loss: 0.0509 - acc: 0.9846 - val_loss: 0.0439 - val_acc: 0.9853
Epoch 4/10
 - 1s - loss: 0.0391 - acc: 0.9879 - val_loss: 0.0402 - val_acc: 0.9879
Epoch 5/10
 - 1s - loss: 0.0326 - acc: 0.9898 - val_loss: 0.0348 - val_acc: 0.9881
Epoch 6/10
 - 1s - loss: 0.0266 - acc: 0.9917 - val_loss: 0.0330 - val_acc: 0.9898
Epoch 7/10
 - 1s - loss: 0.0217 - acc: 0.9933 - val_loss: 0.0359 - val_acc: 0.9886
Epoch 8/10
 - 1s - loss: 0.0192 - acc: 0.9939 - val_loss: 0.0338 - val_acc: 0.9880
Epoch 9/10
 - 1s - loss: 0.0154 - acc: 0.9951 - val_loss: 0.0322 - val_acc: 0.9892
Epoch 10/10
 - 1s - loss: 0.0140 - acc: 0.9958 - val_loss: 0.0345 - val_acc: 0.9891
CNN Error 1.0900000000000034


In [21]:
# create larger CNN architecture with close to state-of-the-art results for this dataset
# extending the baseline CNN architecture with additional Convolutional, MaxPooling and fully connected layers
# Layer 1 - Convolution ; 30 feature maps of 5x5
# Layer 2 - MaxPooling ; 2x2 size
# Layer 3 - Convolution ; 15 feature maps of 3x3
# Layer 4 - MaxPooling ; 2x2 size
# Layer 5 - Dropout ; 20%
# Layer 6 - Flatten 
# Layer 7 - Dense ; hidden layer with 128 neurons and relu activation
# Layer 8 - Dense ; hidden layer with 50 neurons and relu activation
# Layer 9 - Dense ; output layer with neurons = num_classes == 10 and softmax activation
# compile model with adam gradient descent for optimization 
def state_CNN2D():
    # init Sequential model
    model = Sequential()
    
    # Input layer - Layer 1
    model.add(
        Conv2D(30, (5,5), input_shape=(1, 28, 28), activation='relu')
    )
    # MaxPooling layer - Layer 2
    model.add(
        MaxPooling2D(pool_size=(2,2))
    )
    # Convolution layer - Layer 3
    model.add(
        Conv2D(15, (3,3), activation='relu')
    )
    # MaxPooling layer - Layer 4
    model.add(
        MaxPooling2D(pool_size=(2,2))
    )
    # Dropout layer - Layer 5
    model.add(
        Dropout(0.2)
    )
    # Flatten layer - Layer 6
    model.add(
        Flatten()
    )
    # Dense layer - Layer 7 - Hidden layer
    model.add(
        Dense(128, activation='relu')
    )
    # Dense layer - Layer 8 - Hidden layer
    model.add(
        Dense(50, activation='relu')
    )
    # Dense layer - Layer 9 - Output layer
    model.add(
        Dense(num_classes, activation='softmax')
    )
    
    # compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [22]:
# del previous cnn model
del(model)

In [30]:
# build and run larger CNN 
model = state_CNN2D()
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=10, batch_size=200, verbose=2)
# store model evaluation scores
scores = model.evaluate(X_test, Y_test, verbose=0)
print('CNN Error {:.2f}%'.format(100-scores[1]*100))
print('CNN Accuracy {:.2f}'.format(100 - (100-scores[1]*100)))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 2s - loss: 0.4057 - acc: 0.8708 - val_loss: 0.0950 - val_acc: 0.9710
Epoch 2/10
 - 1s - loss: 0.1000 - acc: 0.9694 - val_loss: 0.0515 - val_acc: 0.9832
Epoch 3/10
 - 1s - loss: 0.0699 - acc: 0.9786 - val_loss: 0.0425 - val_acc: 0.9867
Epoch 4/10
 - 1s - loss: 0.0566 - acc: 0.9820 - val_loss: 0.0334 - val_acc: 0.9890
Epoch 5/10
 - 1s - loss: 0.0476 - acc: 0.9851 - val_loss: 0.0349 - val_acc: 0.9890
Epoch 6/10
 - 1s - loss: 0.0422 - acc: 0.9861 - val_loss: 0.0315 - val_acc: 0.9895
Epoch 7/10
 - 1s - loss: 0.0367 - acc: 0.9888 - val_loss: 0.0266 - val_acc: 0.9918
Epoch 8/10
 - 1s - loss: 0.0338 - acc: 0.9889 - val_loss: 0.0244 - val_acc: 0.9920
Epoch 9/10
 - 1s - loss: 0.0309 - acc: 0.9899 - val_loss: 0.0286 - val_acc: 0.9904
Epoch 10/10
 - 1s - loss: 0.0287 - acc: 0.9907 - val_loss: 0.0254 - val_acc: 0.9919
CNN Error 0.81%
CNN Accuracy 99.19
