- Import the necessary libraries
- Add the dataset - MNIST
- Build the CNN model without padding
  - 28 x 28 x 1 image with 32 3 x 3 kernels
- Flatten the image array
- Add MLP over the flattened array
- Get the model summary
- Second time building the same netwrok with padding = 'same', one layer of padding.
  - We get same structure in the output
- Adding strides = 1 - no difference and strides = 2 - huge difference
- Adding a max pooling layer
  - Added pooling layer after each Conv layer with default options - strides = 2 and kernel size = 2 x 2
- Compile the model
- Fit the model



In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras
from keras import Sequential
from keras.datasets import mnist
from keras.layers import Dense, Flatten, Conv2D, BatchNormalization, MaxPooling2D
# from keras.layers import * (when we know all the functions in keras.layers)

In [2]:
(x_train, y_train), (x_test,y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
x_train = x_train / 255.0
x_test = x_test / 255.0

y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

In [4]:
input_shape = (28, 28, 1)

In [5]:
model = Sequential()
# CNN
model.add(Conv2D(32, kernel_size = (3,3), padding = 'valid', activation = 'relu', input_shape = (28, 28, 1)))
model.add(Conv2D(32, kernel_size = (3, 3), padding = 'valid', activation = 'relu'))
model.add(Conv2D(32, kernel_size = (3, 3), padding = 'valid', activation = 'relu'))
# padding = 'valid' means no padding
# 32 is the number of kernels which is 3 x 3
# We are feeding this model to a MLP

# Below is the normal MLP
model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 32)        9248      
                                                                 
 conv2d_2 (Conv2D)           (None, 22, 22, 32)        9248      
                                                                 
 flatten (Flatten)           (None, 15488)             0         
                                                                 
 dense (Dense)               (None, 128)               1982592   
                                                                 
 dense_1 (Dense)             (None, 10)                1290      
                                                                 
Total params: 2002698 (7.64 MB)
Trainable params: 200269

In [6]:
model2 = Sequential()
# CNN
model2.add(Conv2D(32, kernel_size = (3,3), padding = 'same', activation = 'relu', input_shape = (28, 28, 1)))
model2.add(Conv2D(32, kernel_size = (3, 3), padding = 'same', activation = 'relu'))
model2.add(Conv2D(32, kernel_size = (3, 3), padding = 'same', activation = 'relu'))
# padding = 'same' means one layer of padding
# 32 is the number of kernels which is 3 x 3
# We are feeding this model to a MLP

# Below is the normal MLP
model2.add(Flatten())
model2.add(Dense(128, activation = 'relu'))
model2.add(Dense(10, activation = 'softmax'))
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 conv2d_4 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 conv2d_5 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 flatten_1 (Flatten)         (None, 25088)             0         
                                                                 
 dense_2 (Dense)             (None, 128)               3211392   
                                                                 
 dense_3 (Dense)             (None, 10)                1290      
                                                                 
Total params: 3231498 (12.33 MB)
Trainable params: 323

In [7]:
model3 = Sequential()
# CNN
model3.add(Conv2D(32, kernel_size = (3,3), padding = 'same', activation = 'relu', input_shape = (28, 28, 1)))
model3.add(BatchNormalization())
model3.add(Conv2D(32, kernel_size = (3, 3), padding = 'same', activation = 'relu'))
model3.add(BatchNormalization())
model3.add(Conv2D(32, kernel_size = (3, 3), padding = 'same', activation = 'relu'))
model3.add(BatchNormalization())
# padding = 'same' means one layer of padding
# 32 is the number of kernels which is 3 x 3
# We are feeding this model to a MLP

# Below is the normal MLP
model3.add(Flatten())
model3.add(Dense(128, activation = 'relu'))
model3.add(BatchNormalization())
model3.add(Dense(10, activation = 'softmax'))
model3.summary()

# we get non trainable parameters because we have added the Batch normalization

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 batch_normalization (Batch  (None, 28, 28, 32)        128       
 Normalization)                                                  
                                                                 
 conv2d_7 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 batch_normalization_1 (Bat  (None, 28, 28, 32)        128       
 chNormalization)                                                
                                                                 
 conv2d_8 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 batch_normalization_2 (Bat  (None, 28, 28, 32)       

In [8]:
model4 = Sequential()
# CNN
model4.add(Conv2D(32, kernel_size = (3,3), strides = 1, padding = 'same', activation = 'relu', input_shape = (28, 28, 1)))
model4.add(BatchNormalization())
model4.add(Conv2D(32, kernel_size = (3, 3), strides = 1,padding = 'same', activation = 'relu'))
model4.add(BatchNormalization())
model4.add(Conv2D(32, kernel_size = (3, 3), strides = 1, padding = 'same', activation = 'relu'))
model4.add(BatchNormalization())
# padding = 'same' means one layer of padding
# 32 is the number of kernels which is 3 x 3
# We are feeding this model to a MLP
# Given strides = 1 no difference from above in parameters

# Below is the normal MLP
model4.add(Flatten())
model4.add(Dense(128, activation = 'relu'))
model4.add(BatchNormalization())
model4.add(Dense(10, activation = 'softmax'))
model4.summary()

# we get non trainable parameters because we have added the Batch normalization

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 batch_normalization_4 (Bat  (None, 28, 28, 32)        128       
 chNormalization)                                                
                                                                 
 conv2d_10 (Conv2D)          (None, 28, 28, 32)        9248      
                                                                 
 batch_normalization_5 (Bat  (None, 28, 28, 32)        128       
 chNormalization)                                                
                                                                 
 conv2d_11 (Conv2D)          (None, 28, 28, 32)        9248      
                                                                 
 batch_normalization_6 (Bat  (None, 28, 28, 32)       

In [9]:
model5 = Sequential()
# CNN
model5.add(Conv2D(32, kernel_size = (3,3), strides = 2, padding = 'same', activation = 'relu', input_shape = (28, 28, 1)))
model5.add(BatchNormalization())
model5.add(Conv2D(32, kernel_size = (3, 3), strides = 2,padding = 'same', activation = 'relu'))
model5.add(BatchNormalization())
model5.add(Conv2D(32, kernel_size = (3, 3), strides = 2, padding = 'same', activation = 'relu'))
model5.add(BatchNormalization())
# padding = 'same' means one layer of padding
# 32 is the number of kernels which is 3 x 3
# We are feeding this model to a MLP
# Given strides = 2 huge difference from above in parameters

# Below is the normal MLP
model5.add(Flatten())
model5.add(Dense(128, activation = 'relu'))
model5.add(BatchNormalization())
model5.add(Dense(10, activation = 'softmax'))
model5.summary()

# we get non trainable parameters because we have added the Batch normalization

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 14, 14, 32)        320       
                                                                 
 batch_normalization_8 (Bat  (None, 14, 14, 32)        128       
 chNormalization)                                                
                                                                 
 conv2d_13 (Conv2D)          (None, 7, 7, 32)          9248      
                                                                 
 batch_normalization_9 (Bat  (None, 7, 7, 32)          128       
 chNormalization)                                                
                                                                 
 conv2d_14 (Conv2D)          (None, 4, 4, 32)          9248      
                                                                 
 batch_normalization_10 (Ba  (None, 4, 4, 32)         

In [10]:
model6 = Sequential()
# CNN
model6.add(Conv2D(32, kernel_size = (3,3), strides = 2, padding = 'valid', activation = 'relu', input_shape = (28, 28, 1)))
model6.add(BatchNormalization())
model6.add(Conv2D(32, kernel_size = (3, 3), strides = 2,padding = 'valid', activation = 'relu'))
model6.add(BatchNormalization())
model6.add(Conv2D(32, kernel_size = (3, 3), strides = 2, padding = 'valid', activation = 'relu'))
model6.add(BatchNormalization())
# padding = 'same' means one layer of padding
# 32 is the number of kernels which is 3 x 3
# We are feeding this model to a MLP
# Given strides = 2 huge difference from above in parameters
# No point in usung padding and stride together

# Below is the normal MLP
model6.add(Flatten())
model6.add(Dense(128, activation = 'relu'))
model6.add(BatchNormalization())
model6.add(Dense(10, activation = 'softmax'))
model6.summary()

# we get non trainable parameters because we have added the Batch normalization

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_15 (Conv2D)          (None, 13, 13, 32)        320       
                                                                 
 batch_normalization_12 (Ba  (None, 13, 13, 32)        128       
 tchNormalization)                                               
                                                                 
 conv2d_16 (Conv2D)          (None, 6, 6, 32)          9248      
                                                                 
 batch_normalization_13 (Ba  (None, 6, 6, 32)          128       
 tchNormalization)                                               
                                                                 
 conv2d_17 (Conv2D)          (None, 2, 2, 32)          9248      
                                                                 
 batch_normalization_14 (Ba  (None, 2, 2, 32)         

## Max pooling layer

In [11]:
model7 = Sequential()
# CNN
model7.add(Conv2D(32, kernel_size = (3,3), strides = 1, padding = 'same', activation = 'relu', input_shape = (28, 28, 1)))
model7.add(MaxPooling2D(pool_size = (2,2), strides = 2))
model7.add(Conv2D(32, kernel_size = (3, 3), strides = 1,padding = 'same', activation = 'relu'))
model7.add(MaxPooling2D(pool_size = (2,2), strides = 2))
model7.add(Conv2D(32, kernel_size = (3, 3), strides = 1, padding = 'same', activation = 'relu'))
model7.add(MaxPooling2D(pool_size = (2,2), strides = 2))
# padding = 'same' means one layer of padding
# 32 is the number of kernels which is 3 x 3
# We are feeding this model to a MLP
# Given strides = 1 no difference from above in parameters
# Added pooling layer after each Conv layer with default options - strides = 2 and kernel size = 2 x 2

# Below is the normal MLP
model7.add(Flatten())
model7.add(Dense(128, activation = 'relu'))
model7.add(Dense(10, activation = 'softmax'))
model7.summary()

# Parameter claculations formula = kernel_width x kernel_height x # of channels x # of filters + # of filters

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_18 (Conv2D)          (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 32)        0         
 D)                                                              
                                                                 
 conv2d_19 (Conv2D)          (None, 14, 14, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 32)          0         
 g2D)                                                            
                                                                 
 conv2d_20 (Conv2D)          (None, 7, 7, 32)          9248      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 3, 3, 32)         

In [12]:
model7.compile(optimizer = 'adam', loss = keras.losses.categorical_crossentropy, metrics = ['accuracy'])

In [13]:
history = model7.fit(x_train, y_train, batch_size = 64, epochs = 10, validation_data = (x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
