In [1]:
#we import all the libraries needed to build  a CNN model
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

Using TensorFlow backend.


In [2]:
# defining batch size,no of output classes and no of epochs for training
batch_size = 128
num_classes = 10
epochs = 10

In [3]:
# input image dimensions for Mnist Dataset
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
#reshaping the trian and test data matrices according to the image data format assumed in the backend of keras
if K.image_data_format() == 'channels_first':
    #  here channels will be first in the dimentions
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
     #  here channels will be last in the dimentions
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [5]:
#defining data type is the matices
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [6]:
#normalising the data
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [7]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# 3 LAYERED CNN

In [9]:
#defining a module
model = Sequential()
# adding first convolutional layer eith relu 
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
# second conv layer with relu
model.add(Conv2D(64, (3, 3), activation='relu'))
#max pooling later
model.add(MaxPooling2D(pool_size=(2, 2)))
#drop out layer
model.add(Dropout(0.25))
# third conv layer with relu
model.add(Conv2D(32, (3, 3), activation='relu'))
#max pooling later
model.add(MaxPooling2D(pool_size=(2, 2)))
#flattening outputs
model.add(Flatten())
#hidden layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
#softmax classifier
model.add(Dense(num_classes, activation='softmax'))

#model summary
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 32)        18464     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 800)               0         
__________

In [10]:
#we will compile the module with ada delta optimiser 

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [11]:
#we trian and validate the model
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f5774988cf8>

In [12]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.024620758173108152
Test accuracy: 0.9925


# 5 LAYERED CNN with ADAM AND sigmoid activations

In [19]:
#defining a module
model_2 = Sequential()
# adding first convolutional layer eith relu 
model_2.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
# second conv layer with relu
model_2.add(Conv2D(128, (3, 3), activation='relu'))
#max pooling later
model_2.add(MaxPooling2D(pool_size=(2, 2)))
#drop out layer
model_2.add(Dropout(0.25))
# third conv layer with relu
model_2.add(Conv2D(64, (3, 3), activation='relu'))
model_2.add(Dropout(0.25))
# fourth conv layer with relu
model_2.add(Conv2D(64, (3, 3), activation='relu'))
# fifth conv layer with relu
model_2.add(Conv2D(32, (3, 3), activation='relu'))
#max pooling later
model_2.add(MaxPooling2D(pool_size=(2, 2)))

model_2.add(Flatten())
#hidden layer
model_2.add(Dense(128, activation='sigmoid'))
model_2.add(Dropout(0.5))
#softmax classifier
model_2.add(Dense(num_classes, activation='softmax'))

#model summary
model_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_33 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 24, 24, 128)       36992     
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 12, 12, 128)       0         
_________________________________________________________________
dropout_16 (Dropout)         (None, 12, 12, 128)       0         
_________________________________________________________________
conv2d_35 (Conv2D)           (None, 10, 10, 64)        73792     
_________________________________________________________________
dropout_17 (Dropout)         (None, 10, 10, 64)        0         
_________________________________________________________________
conv2d_36 (Conv2D)           (None, 8, 8, 64)          36928     
__________

In [20]:
#we will compile the module with ada delta optimiser 

model_2.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])

In [21]:
#we trian and validate the model
model_2.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f56962b47f0>

In [28]:
score = model_2.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.023042755649157334
Test accuracy: 0.9927


# 7 LAYERED CNN with sgd AND tanh activations

In [25]:
#defining a module
model_3 = Sequential()
# adding first convolutional layer eith relu 
model_3.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
# second conv layer with relu
model_3.add(Conv2D(128, (3, 3), activation='relu'))
#max pooling later
model_3.add(MaxPooling2D(pool_size=(2, 2)))
#drop out layer
model_3.add(Dropout(0.25))
# third conv layer with relu
model_3.add(Conv2D(64, (3, 3), activation='relu'))
model_3.add(Dropout(0.25))
# fourth conv layer with relu
model_3.add(Conv2D(64, (3, 3), activation='relu'))
# fifth conv layer with relu
model_3.add(Conv2D(128, (3, 3), activation='relu'))

# sixth conv layer with relu
model_3.add(Conv2D(64, (3, 3), activation='relu'))
# seventh conv layer with relu
model_3.add(Conv2D(32, (3, 3), activation='relu'))
#max pooling later
model_3.add(MaxPooling2D(pool_size=(2, 2)))

model_3.add(Flatten())
#hidden layer
model_3.add(Dense(128, activation='tanh'))
model_3.add(Dropout(0.5))
#softmax classifier
model_3.add(Dense(num_classes, activation='softmax'))

#model summary
model_3.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_58 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_59 (Conv2D)           (None, 24, 24, 128)       36992     
_________________________________________________________________
max_pooling2d_29 (MaxPooling (None, 12, 12, 128)       0         
_________________________________________________________________
dropout_27 (Dropout)         (None, 12, 12, 128)       0         
_________________________________________________________________
conv2d_60 (Conv2D)           (None, 10, 10, 64)        73792     
_________________________________________________________________
dropout_28 (Dropout)         (None, 10, 10, 64)        0         
_________________________________________________________________
conv2d_61 (Conv2D)           (None, 8, 8, 64)          36928     
__________

In [33]:
#we will compile the module with ada delta optimiser 

model_3.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='sgd',
              metrics=['accuracy'])

In [34]:
#we trian and validate the model
model_3.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f562169cc50>

In [36]:
score = model_3.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.015821283910993952
Test accuracy: 0.995


CONCLUSIONS:
    
    all three models gave very high accuracy ,even with different architechtures, optimisers 
    and activation functions convergence rate is faster and all models converged in  the first epoch itself 
    but the time taken to train is significantly high due to increased architechture (increased no of trainable parameters)