# MNIST Digits Classification
This notebook trains a Convolutional Neural Network model to classify the MNIST dataset that consists of 28x28 pixel images of handwritten digits with their labels. It uses the Keras library with Tensorflow backend.

## Load Data

In [17]:
from keras.datasets import mnist
IMAGE_SIZE = 28

(x_train, y_train), (x_test, y_test) = mnist.load_data()

## Preprocess Data
The input images are normalized and the output classes are one-hot encoded. Images are reshaped to the model input dimensions.

In [18]:
import keras

def preprocess(x_train, y_train, x_test, y_test):
    # Normalize to between 0 and 1
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255
    print('x_train max', x_train.max())
    print('x_train min', x_train.min())

    NUM_CLASSES = 10
    print('y_train min', y_train.min())
    print('y_train max', y_train.max())

    # one-hot encode labels
    y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)
    y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)

    # Reshape to model input
    x_train = x_train.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
    x_test = x_test.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
    print('x_train shape', x_train.shape)
    print('x_test shape', x_train.shape)
    
    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = preprocess(x_train, y_train, x_test, y_test)

x_train max 1.0
x_train min 0.0
y_train min 0
y_train max 9
x_train shape (60000, 28, 28, 1)
x_test shape (60000, 28, 28, 1)


## Model Architecture


In [40]:
from keras.models import Model
from keras.layers import concatenate, Conv2D, Dense, Flatten, Input, MaxPooling2D

IMAGE_SIZE = 28
FILTER_SHAPE = (3, 3)


def create_model():
    inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 1))

    conv1 = Conv2D(32, FILTER_SHAPE, activation='relu', padding='same')(inputs)
    pool1 = MaxPooling2D()(conv1)

    conv2_1 = Conv2D(64, FILTER_SHAPE, activation='relu', padding='same')(pool1)
    pool2_1 = MaxPooling2D()(conv2_1)
    conv2_2 = Conv2D(64, FILTER_SHAPE, activation='relu', padding='same')(pool1)
    pool2_2 = MaxPooling2D()(conv2_2)

    conv3_1 = Conv2D(256, FILTER_SHAPE, activation='relu', padding='same')(pool2_1)
    conv3_2 = Conv2D(256, FILTER_SHAPE, activation='relu', padding='same')(pool2_2)
    conv3 = concatenate([conv3_1, conv3_2])
    pool3 = MaxPooling2D()(conv3)

    flattened_conv3 = Flatten()(pool3)
    fc1 = Dense(1000, activation='relu')(flattened_conv3)
    fc2 = Dense(500, activation='relu')(fc1)
    outputs = Dense(10, activation='softmax')(fc2)

    model = Model(inputs, outputs)
    model.summary()
    return model

model = create_model()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_31 (Conv2D)              (None, 28, 28, 32)   320         input_7[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_24 (MaxPooling2D) (None, 14, 14, 32)   0           conv2d_31[0][0]                  
__________________________________________________________________________________________________
conv2d_32 (Conv2D)              (None, 14, 14, 64)   18496       max_pooling2d_24[0][0]           
__________________________________________________________________________________________________
conv2d_33 

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=8,
          epochs=100,
          shuffle=True,
          verbose=1,
          validation_split=0.1
         )

model.save('model.h5')

Train on 54000 samples, validate on 6000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100

# Results
Gets to 99.25% test accuracy after 12 epochs


In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])