## Architecture 1:
### conv(15*  7*   7)->conv(12*   5*    5)->Maxpooling(size=(2,2))->conv(15*   7*    7)->Maxpooling(size=(2,2))->flatten->dense->dropout->softmax
### conv(a*   b*   c) --->denotes a =no_of_kernels and b =no_of_rows in a kernel and c=no_of_columns in kernel..

In [1]:
# Credits: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py


from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(15, kernel_size=(7, 7),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(12, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(15, kernel_size=(7, 7),
                 activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
print(model.summary())

Using TensorFlow backend.


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 22, 22, 15)        750       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 18, 12)        4512      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 9, 9, 12)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 3, 15)          8835      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 1, 1, 15)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 15)                0         
_________________________________________________________________
den

In [2]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import time
# https://gist.github.com/greydanus/f6eee59eaf1d90fcb3b534a25362cea4
# https://stackoverflow.com/a/14434334
# this function is used to update the plots for each epoch and error
def plt_dynamic(x, vy, ty, ax, colors=['b']):
    ax.plot(x, vy, 'b', label="Validation Loss")
    ax.plot(x, ty, 'r', label="Train Loss")
    plt.legend()
    plt.grid()
    fig.canvas.draw()

In [3]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

plot_array=model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.03627164460248023
Test accuracy: 0.9894


In [4]:
nb_epoch=12
score = model.evaluate(x_test, y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])
print(plot_array.history.keys())
fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')
x = list(range(1,nb_epoch+1))
vy = plot_array.history['val_loss']
ty = plot_array.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.03627164460248023
Test accuracy: 0.9894
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])


<IPython.core.display.Javascript object>

### Architecture 2:  with batchNormalization and dropouts..

In [5]:
from  keras.layers.normalization import BatchNormalization
from keras.layers import Dropout
model1 = Sequential()
model1.add(Conv2D(15, kernel_size=(7, 7),padding='valid',
                 activation='relu',
                 input_shape=input_shape))
model1.add(Conv2D(12, (3, 3), padding='valid',activation='relu'))
model1.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2),padding='valid'))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Conv2D(18, (5, 5), padding='same',activation='relu'))
model1.add(Conv2D(25, (7, 7), padding='same',activation='relu'))
model1.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2),padding='same'))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Conv2D(32, (5, 5), padding='same',activation='relu'))
model1.add(Conv2D(12, (7, 7), padding='same',activation='relu'))
model1.add(Conv2D(12, (9, 9), padding='same',activation='relu'))
model1.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2),padding='valid'))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Flatten())
model1.add(Dense(128, activation='relu',kernel_initializer=keras.initializers.he_normal(seed=None)))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Dense(64, activation='relu',kernel_initializer=keras.initializers.he_normal(seed=None)))
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Dense(num_classes, activation='softmax',kernel_initializer=keras.initializers.he_normal(seed=None)))
print(model1.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 22, 22, 15)        750       
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 20, 20, 12)        1632      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 10, 10, 12)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 10, 10, 12)        48        
_________________________________________________________________
dropout_2 (Dropout)          (None, 10, 10, 12)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 10, 10, 18)        5418      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 10, 10, 25)        22075     
__________

In [6]:
model1.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

plot_array=model1.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=30,
          verbose=1,
          validation_data=(x_test, y_test))
score = model1.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss: 0.03540907850921212
Test accuracy: 0.9916


In [8]:
nb_epoch=30
score = model1.evaluate(x_test, y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])
print(plot_array.history.keys())
fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')
x = list(range(1,nb_epoch+1))
vy = plot_array.history['val_loss']
ty = plot_array.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.03540907850921212
Test accuracy: 0.9916
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])


<IPython.core.display.Javascript object>

## Model 2 is nothing but model1+ data augmentation

In [9]:
nb_epoch=30
from  keras.layers.normalization import BatchNormalization
from keras.layers import Dropout
model2 = Sequential()
model2.add(Conv2D(15, kernel_size=(7, 7),padding='valid',
                 activation='relu',
                 input_shape=input_shape))
model2.add(Conv2D(12, (3, 3), padding='valid',activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2),padding='valid'))
model2.add(BatchNormalization())
model2.add(Dropout(0.5))
model2.add(Conv2D(18, (5, 5), padding='same',activation='relu'))
model2.add(Conv2D(25, (7, 7), padding='same',activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2),padding='same'))
model2.add(BatchNormalization())
model2.add(Dropout(0.5))
model2.add(Conv2D(32, (5, 5), padding='same',activation='relu'))
model2.add(Conv2D(12, (7, 7), padding='same',activation='relu'))
model2.add(Conv2D(12, (9, 9), padding='same',activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2),padding='valid'))
model2.add(BatchNormalization())
model2.add(Dropout(0.5))
model2.add(Flatten())
model2.add(Dense(128, activation='relu',kernel_initializer=keras.initializers.he_normal(seed=None)))
model2.add(BatchNormalization())
model2.add(Dropout(0.5))
model2.add(Dense(64, activation='relu',kernel_initializer=keras.initializers.he_normal(seed=None)))
model2.add(BatchNormalization())
model2.add(Dropout(0.5))
model2.add(Dense(num_classes, activation='softmax',kernel_initializer=keras.initializers.he_normal(seed=None)))
print(model2.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 22, 22, 15)        750       
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 20, 20, 12)        1632      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 10, 10, 12)        0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 10, 10, 12)        48        
_________________________________________________________________
dropout_7 (Dropout)          (None, 10, 10, 12)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 10, 10, 18)        5418      
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 10, 10, 25)        22075     
__________

In [10]:
from keras.preprocessing.image import ImageDataGenerator
train_gen =ImageDataGenerator(rotation_range=40,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.2,zoom_range=0.2,horizontal_flip=True,fill_mode='nearest')
train_gen.fit(x_train)
model2.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])
plot_array=model2.fit_generator(train_gen.flow(x_train,y_train,
                    batch_size=batch_size),
                    nb_epoch=nb_epoch,
                    validation_data=(x_test,y_test))

  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [11]:
nb_epoch=30
score = model2.evaluate(x_test, y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])
print(plot_array.history.keys())
fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')
x = list(range(1,nb_epoch+1))
vy = plot_array.history['val_loss']
ty = plot_array.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.1078857864547521
Test accuracy: 0.9684
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])


<IPython.core.display.Javascript object>