In [1]:
#Kernel from: https://www.kaggle.com/gabrielmv/mnist-digit-recognizer-99/output
import numpy as np 
import pandas as pd 

from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, BatchNormalization, Activation, Dropout, DepthwiseConv2D
from keras.layers.pooling import GlobalAveragePooling2D, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
from keras.datasets import mnist
from keras.utils import np_utils

Using TensorFlow backend.


In [6]:
import os
print(os.listdir("./input"))

['sample_submission.csv', 'test.csv', 'train.csv']


In [5]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.vstack((X_train, X_test))
y_train = np.concatenate([y_train, y_test])
X_train = X_train.reshape(-1, 28, 28, 1)
print(X_train.shape, y_train.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
(70000, 28, 28, 1) (70000,)


In [7]:
train = pd.read_csv('./input/train.csv').values
y_val = train[:,0].astype('int32')
X_val = train[:,1:].astype('float32')
X_val = X_val.reshape(-1,28,28,1)
print(X_val.shape, y_val.shape)

(42000, 28, 28, 1) (42000,)


In [8]:
#Split the data into images and labels
X_test = pd.read_csv('./input/test.csv').values.astype('float32')
X_test = X_test.reshape(-1, 28, 28, 1)

In [9]:
#Now we normalize the training and test data
X_train = X_train.astype('float32') / 255
X_val = X_val.astype('float32') / 255
X_test = X_test.astype('float32') / 255 

In [10]:
#Now we encode the labels to one hot encoded vectors
y_train = np_utils.to_categorical(y_train, 10)
y_val = np_utils.to_categorical(y_val, 10)

In [12]:
#set the random seed
random_seed = 2

In [13]:
def create_model():
    
    model = Sequential()
    model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu', input_shape = (28,28,1)))
    model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(MaxPooling2D(pool_size = 2))
    
    model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(Conv2D(filters = 192, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(MaxPooling2D(pool_size = 2))
    
    model.add(Conv2D(filters = 192, kernel_size = 5, padding = 'same', activation = 'relu'))
    model.add(MaxPooling2D(pool_size = 2, padding = 'same'))
    
    model.add(Flatten())
    
    model.add(Dense(256, activation = 'relu'))
    model.add(Dense(10, activation = 'softmax'))
    
    
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    
    return model

In [14]:
model = create_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 64)        640       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 64)        36928     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 14, 128)       147584    
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 14, 14, 192)       221376    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 192)         0         
__________

In [15]:
#Now we define the callbacks we'll use while training
reduce_learning_rate = ReduceLROnPlateau(monitor = 'val_acc', patience = 3, verbose = 1, factor = 0.3, min_lr = 0.00001)
checkpoint = ModelCheckpoint('mnist_weights.h5', monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
early_stopping = EarlyStopping(monitor = 'val_loss', min_delta = 1e-10, patience = 10, verbose = 1, restore_best_weights = True)

callbacks = [reduce_learning_rate, checkpoint, early_stopping]

In [16]:
#Finally we train the model

In [17]:
history = model.fit(X_train, 
                    y_train, 
                    batch_size = 100, 
                    epochs = 1,
                    validation_data = (X_val, y_val),  
                    callbacks = callbacks,
                    verbose = 1, 
                    shuffle = True)

Train on 70000 samples, validate on 42000 samples
Epoch 1/1
14000/70000 [=====>........................] - ETA: 13:20 - loss: 0.4666 - acc: 0.8514

KeyboardInterrupt: 