In [1]:
import numpy as np
import tensorflow.keras
import pandas as pd
import h5py

  from ._conv import register_converters as _register_converters


In [2]:
# AS data file is in H5 document, using h5py package to read the file
f = h5py.File('SVHN_single_grey1.h5', 'r')
list(f.keys())

['X_test', 'X_train', 'X_val', 'y_test', 'y_train', 'y_val']

In [3]:
#Loading Train and Test data from the file
x_train = f.get('X_train')
y_train = f.get('y_train')
x_test = f.get('X_test')
y_test = f.get('y_test')

In [4]:
print('Number of samples in Training dataset: ', x_train.shape[0])
print('Number of samples in Test dataset: ', x_test.shape[0])
print('Dimensions of the image:', x_train[0].shape)

Number of samples in Training dataset:  42000
Number of samples in Test dataset:  18000
Dimensions of the image: (32, 32)


### KNN Classification

In [5]:
x_train_lst = []

for x in x_train:
    x = x.astype('float32')
    x /= 255
    x_train_lst.append(x.reshape(1024))

x_train_df = pd.DataFrame(x_train_lst)

In [6]:
from sklearn.neighbors import KNeighborsClassifier  
classifier = KNeighborsClassifier(n_neighbors=5)  
classifier.fit(x_train_df, y_train) 

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [7]:
#Preparing Test Data
x_test_lst = []

for x in x_test:
    x = x.astype('float32')
    x /= 255
    x_test_lst.append(x.reshape(1024))

x_test_df = pd.DataFrame(x_test_lst)

In [8]:
y_pred = classifier.predict(x_test_df.iloc[0:1000,])

In [9]:
from sklearn.metrics import classification_report, confusion_matrix  
print(confusion_matrix(y_test[0:1000,], y_pred))
print(classification_report(y_test[0:1000,], y_pred))

[[77  8  1  2  2  2  8  1  7  9]
 [ 3 72  4  5  4  1  2  2  0  0]
 [10 18 41  5  1  1  2  4  4  6]
 [ 9 10 10 36  2  6  3  1  3  5]
 [11 14  3  2 58  1  2  2  2  1]
 [11 12  3 15  3 41  5  1  6  8]
 [13  7  2  2 13 10 38  0  7  2]
 [ 6 16  7  1  1  1  0 71  1  3]
 [22  9  4  3  3  6  8  2 39  4]
 [20  8  5  8  5  4  7  4 10 40]]
              precision    recall  f1-score   support

           0       0.42      0.66      0.52       117
           1       0.41      0.77      0.54        93
           2       0.51      0.45      0.48        92
           3       0.46      0.42      0.44        85
           4       0.63      0.60      0.62        96
           5       0.56      0.39      0.46       105
           6       0.51      0.40      0.45        94
           7       0.81      0.66      0.73       107
           8       0.49      0.39      0.44       100
           9       0.51      0.36      0.42       111

   micro avg       0.51      0.51      0.51      1000
   macro avg       

####  Accuracy with With KNN the prediction was between 50-60%

### Deep Neural Network Classifier

In [10]:
x_train_neural = x_train.value
x_test_neural = x_test.value

In [11]:
x_train_neural = x_train_neural.astype('float32')
x_test_neural = x_test_neural.astype('float32')

x_train_neural /= 255
x_test_neural /= 255

In [12]:
x_train_neural = x_train_neural.reshape(x_train_neural.shape[0], 32, 32, 1).astype('float32')
x_test_neural = x_test_neural.reshape(x_test_neural.shape[0], 32, 32, 1).astype('float32')

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Reshape

#### Implementing a Deep Neural Network Classifier with
1. RELU Activations
2. Batch Normalisation
3. Cost Functions
4. Optimizers

In [30]:
# Define model
model = Sequential()

# 1st Conv Layer
model.add(Conv2D(32, (3, 3), input_shape=(32, 32, 1)))
model.add(Activation('relu'))

# 2nd Conv Layer
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))

# Fully Connected Layer
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))

#Batch Normalisation
model.add(tensorflow.keras.layers.BatchNormalization())

# Prediction Layer
model.add(Dense(10,kernel_initializer='he_normal', use_bias=True))
#model.add(Dense(10))
model.add(Activation('softmax'))

# Loss and Optimizer
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
# Store Training Results
early_stopping = tensorflow.keras.callbacks.EarlyStopping(monitor='val_acc', patience=5, verbose=1, mode='auto')
callback_list = [early_stopping]

# Train the model
model.fit(x_train_neural, y_train.value, batch_size=32, epochs=10,
           validation_data=(x_test_neural, y_test.value), callbacks=callback_list)

Train on 42000 samples, validate on 18000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x20e8f5e9ef0>

In [31]:
y_pred_nn = model.predict(x_test_neural)

In [32]:
y_pred_lst = np.argmax(y_pred_nn, axis=1)
y_pred_df = pd.DataFrame(y_pred_lst)
y_pred_df.head(5)

Unnamed: 0,0
0,1
1,7
2,2
3,9
4,0


In [33]:
print(confusion_matrix(y_test, y_pred_df))  
print(classification_report(y_test, y_pred_df))

[[1691   31    9   10    8    5   15    7   18   20]
 [  36 1633    9   34   42   10   11   30   19    4]
 [  20   26 1592   36   29    6    5   33   22   34]
 [  21   22   13 1466   24   73   27   12   44   17]
 [  14   50   18   24 1622   17   21    6   17   23]
 [   8   15    5   95   13 1504   73    7   30   18]
 [  62   18    4   34   28   50 1540    4   83    9]
 [  34   91   33   35   12   10    8 1560   13   12]
 [  30   34   17   47   14   21   64    9 1539   37]
 [  61   23   26   41   22   32   12   10   63 1514]]
              precision    recall  f1-score   support

           0       0.86      0.93      0.89      1814
           1       0.84      0.89      0.87      1828
           2       0.92      0.88      0.90      1803
           3       0.80      0.85      0.83      1719
           4       0.89      0.90      0.89      1812
           5       0.87      0.85      0.86      1768
           6       0.87      0.84      0.85      1832
           7       0.93      0.86   

### Summary

##### KNN Accuracy in ML: 50 - 60%
##### Accuracy in DL: 85%

##### Deep Learning Models perform well for image dataset

Deep Learning model accuracy is 30% more than tradtional ML (KNN) in this case.
EDL is not requried in DL, requires comparitevly less effort.
In DL, we get to see the loss and accuracy while running each epoch.