In [1]:
#Library Load  #We are gonna use Functional Keras

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dropout, Input, AveragePooling2D
from keras.utils import to_categorical
from keras.utils import np_utils

import matplotlib.pyplot as plt


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 


Using TensorFlow backend.


In [2]:
#Dataset Load

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

print(x_train.shape) # Examples, H, W, Depth(rgb=3)
print(y_train.shape) # its not one hot encoded

(60000, 28, 28)
(60000,)


In [3]:
#Data Preperation

img_H = x_train.shape[1]
img_W = x_train.shape[2]
depth = 1

input_shape = (img_H, img_W, depth)

# we change our image type to float32 #needed in order to normalize later
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Getting our date in the right 'shape' needed for Keras
# We need to add a 4th dimenion to our date thereby changing our
# Our original image shape of (60000,28,28) to (60000,28,28,1)
x_train = x_train.reshape(x_train.shape[0], img_H, img_W, 1)
x_test = x_test.reshape(x_test.shape[0], img_H, img_W, 1)

# image normalization max 255 becomes 1
x_train = x_train/255
x_test = x_test/255

print(x_train.shape)

#one hot encoding
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

print(y_train.shape) #10 classes

(60000, 28, 28, 1)
(60000, 10)


In [4]:
#Extra information

no_classes = y_train.shape[1]
no_pixels= img_W*img_H*depth #in one image

print(no_classes)
print(no_pixels)

10
784


In [5]:
#Model Architecture #original LeNet had tanh  #lenet was designed for a 32x32 input we had a 28x28 #padding is valid in lenet

i = Input(shape=(img_H, img_W, depth)) #We define the input shape #height x width X depth #We could have used input_shape

#Convolutional Layers
x = Conv2D(filters=6, kernel_size=(5,5), padding="valid")(i) #default padding is valid
#x = BatchNormalization()(x) #Batch Norm is added after the convolution operation and before the  ReLU
x = Activation("tanh")(x)
x = AveragePooling2D(padding="same")(x) #default values are strides = 2 and filter dimensions = 2 
#28x28x1 --> 24x24x6 --> 12x12x6

x = Conv2D(filters=16, kernel_size=(5,5), padding="valid")(x) #default padding is valid
#x = BatchNormalization()(x) #Batch Norm is added after the convolution operation and before the  ReLU
x = Activation("tanh")(x)
x = AveragePooling2D(padding="same")(x) #default values are strides = 2 and filter dimensions = 2 
#12x12x6 --> 8x8x16 --> 4x4x16

x = Conv2D(filters=120, kernel_size=(4,4), padding="valid")(x) #default padding is valid
#x = BatchNormalization()(x) #Batch Norm is added after the convolution operation and before the  ReLU
x = Activation("tanh")(x)
#x = MaxPooling2D(padding="valid")(x) #default values are strides = 2 and filter dimensions = 2 
#4x4x16 --> 1x1x120 
#If our kernel size is the same as our feature map size the result is 1x1 (f-k)/s +1

#Flattening
x = Flatten()(x) #1x1x120=120 features  dim = (-1,120)

#Fully Connected Layers
#x = Dense(units=400)(x) #128 neurons
#x = Activation('relu')(x)
#x = Dropout(0.5)(x)#Dropout probability to drop a neuron #Dropout is added after the activation
x = Dense(units=84)(x) #128 neurons
x = Activation('tanh')(x)
#x = Dropout(0.5)(x)#Dropout probability to drop a neuron #Dropout is added after the activation
x = Dense(units=no_classes)(x)#Where k is the no classes
x = Activation('softmax')(x)#Here on the contrary with tensorflow we have to manually add the softmax layer


#Compile the model
model = Model(inputs = i, outputs=x ) #we need to define the input and the output of the model

#Model Architecture Visualization

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 6)         156       
_________________________________________________________________
activation_1 (Activation)    (None, 24, 24, 6)         0         
_________________________________________________________________
average_pooling2d_1 (Average (None, 12, 12, 6)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 8, 16)          2416      
_________________________________________________________________
activation_2 (Activation)    (None, 8, 8, 16)          0         
_________________________________________________________________
average_pooling2d_2 (Average (None, 4, 4, 16)          0         
__________

In [6]:
#Loss Function, Optimizer and metrics

model.compile(loss = "categorical_crossentropy",
             optimizer = "adam",
             metrics = ["accuracy"])

In [7]:
#Training

#keras could also split the data for us using  validation split option (=0.2)
r=model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10, batch_size = 32) #default verbose is 1

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
#Model saving

model.save("Keras_LeNet5_mnist.h5")
print("Model Saved")

Model Saved
