In [6]:
import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# due to tf 2.0 - keras api - we are using the tensorflow version of keras

In [7]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [9]:
# keras enforces to work on 3d matrixes for input features
# we would transform the train set and test set features to 3d matrix
# input features are 28x28 - we will add a dummy dimension

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')



In [17]:
# Min Max Normalization

max_limit = x_train.max()
min_limit = x_train.min()

x_train = (x_train - min_limit)/ (max_limit - min_limit)
x_test = (x_test - min_limit)/ (max_limit - min_limit)


In [24]:
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [26]:
y_test

array([[0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [28]:
model = Sequential()

#1st layer
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2,2)))

#2nd layer
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())

#Fully connected layer, 1 hidden layer 512 nodes
model.add(Dense(512,activation='relu'))

# 10 outputs
model.add(Dense(10, activation='softmax'))

In [29]:
# The output of fully connected layer and cnn output is non linear fn
# fnn should be softmax
# output values are normalized bw 0 and 1
# the maximum index would fire the result

In [30]:
# Standard dataset consists of 60K instances. 
# It is hard to handle all instances several 
# times on a personal computer. 
# That’s why, I prefer to use randomly selected 
# ones to train the network. 
# You might skip this step if you have 
# time or strong hardware, and want to work on all instances.


gen = ImageDataGenerator()
train_generator = gen.flow(x_train, y_train, batch_size=320)

In [42]:
# train_generator - 188 batches - ( 320 images, 320 labels)
len(train_generator)
len(train_generator[0])
len(train_generator[0][0])

320

In [44]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam', 
    metrics=['accuracy']
            )

In [50]:
# WITH GENERATOR
# model.fit(train_generator, epochs=10, validation_data=(x_test, y_test))
# The `batch_size` argument must not be specified for the given input type.

# WITHOUT GENERATOR
model.fit(x_train,y_train, batch_size=250, epochs=10, validation_data=(x_test, y_test))


Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x22bde193688>

In [52]:
# model.save('98mnist_10epc_320batch.h5')
model.save('998mnist_10epc_250batch.h5')