In [1]:
import tensorflow
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
import cv2 
import numpy as np
from PIL import Image as im
import seaborn as sns
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.metrics import confusion_matrix,classification_report

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
len(x_train)

60000

In [4]:
v = []
h = []
n_images = 20
for i in range(n_images):
    for j in range(n_images):
        if j == 0:
            h.append(np.invert(x_train[n_images*i+j]))
        else:
            h[i] = np.concatenate((h[i], np.invert(x_train[n_images*i+j])), axis=1)
    if i == 0:
        v = h[0]
    else:
        v = np.concatenate((v, h[i]), axis=0)

cv2.imshow('VERTICAL', v) 
  
cv2.waitKey(0) 
cv2.destroyAllWindows() 

In [5]:
np.shape(x_train)

(60000, 28, 28)

In [6]:
x_train=x_train.reshape(-1,28*28).astype("float32")/255.0
x_test=x_test.reshape(-1,28*28).astype("float32")/255.0

# Using Machine Learning Models

### Naive Bayes Classifier

In [7]:
model_m = MultinomialNB()
model_m.fit(x_train, y_train)

In [8]:
pred_m = model_m.predict(x_test)

In [9]:
print(f"The accuracy score is {accuracy_score(y_test,pred_m)*100}%")

The accuracy score is 83.57%


In [10]:
print(classification_report(y_test, pred_m))

              precision    recall  f1-score   support

           0       0.92      0.93      0.93       980
           1       0.91      0.93      0.92      1135
           2       0.90      0.83      0.86      1032
           3       0.80      0.84      0.82      1010
           4       0.84      0.74      0.79       982
           5       0.87      0.66      0.75       892
           6       0.89      0.90      0.89       958
           7       0.93      0.84      0.88      1028
           8       0.66      0.80      0.72       974
           9       0.71      0.85      0.77      1009

    accuracy                           0.84     10000
   macro avg       0.84      0.83      0.83     10000
weighted avg       0.84      0.84      0.84     10000



In [11]:
print(confusion_matrix(y_test,pred_m))
#sns.heatmap(confusion_matrix(y_test,pred_m),cmap='viridis',annot=True);   

[[ 912    0    2    6    1    8   14    1   36    0]
 [   0 1061    5    9    0    1    6    0   52    1]
 [  15   11  858   24   10    3   32   12   66    1]
 [   4   11   34  852    1   19    7   14   41   27]
 [   2    2    6    0  731    0   25    1   38  177]
 [  23   12    6  108   18  586   17    6   79   37]
 [  18   13   17    1    8   24  859    0   18    0]
 [   1   21   11    5   18    0    1  861   41   69]
 [   6   26   13   55   14   27    8    9  776   40]
 [   6    8    3   11   65    9    1   18   27  861]]


### K-Nearest Neighbors Classifier

In [12]:
model_k = KNeighborsClassifier()
model_k.fit(x_train,y_train)

In [13]:
pred_k = model_k.predict(x_test)

In [14]:
print(f"The accuracy score is {accuracy_score(y_test,pred_k)*100}%")

The accuracy score is 96.88%


In [15]:
print(classification_report(y_test, pred_k))

              precision    recall  f1-score   support

           0       0.96      0.99      0.98       980
           1       0.95      1.00      0.98      1135
           2       0.98      0.96      0.97      1032
           3       0.96      0.97      0.97      1010
           4       0.98      0.96      0.97       982
           5       0.97      0.97      0.97       892
           6       0.98      0.99      0.98       958
           7       0.96      0.96      0.96      1028
           8       0.99      0.94      0.96       974
           9       0.96      0.95      0.95      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000



In [89]:
print(confusion_matrix(y_test,pred_k))
# sns.heatmap(confusion_matrix(y_test,pred_k),cmap='viridis',annot=True);   

[[ 974    1    1    0    0    1    2    1    0    0]
 [   0 1133    2    0    0    0    0    0    0    0]
 [  11    8  991    2    1    0    1   15    3    0]
 [   0    3    3  976    1   13    1    6    3    4]
 [   3    7    0    0  944    0    4    2    1   21]
 [   5    0    0   12    2  862    4    1    2    4]
 [   5    3    0    0    3    2  945    0    0    0]
 [   0   22    4    0    3    0    0  988    0   11]
 [   8    3    5   13    6   12    5    5  913    4]
 [   5    7    3    9    7    3    1   10    2  962]]


### Logistic Regression

In [17]:
model_lr = LogisticRegression(max_iter=1000, solver='lbfgs')

In [18]:
model_lr.fit(x_train, y_train)

In [19]:
pred_lr = model_lr.predict(x_test)

In [20]:
print(f"The accuracy score is {accuracy_score(y_test,pred_lr)*100}%")

The accuracy score is 92.60000000000001%


### Random Forest Classifier 

In [21]:
model_rf = RandomForestClassifier()

In [22]:
model_rf.fit(x_train, y_train)

In [23]:
pred_rf = model_rf.predict(x_test)

In [24]:
print(f"The accuracy score is {accuracy_score(y_test, pred_rf)*100}%")

The accuracy score is 97.0%


In [25]:
print(classification_report(y_test, pred_rf))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.96      0.97      0.96      1032
           3       0.96      0.96      0.96      1010
           4       0.97      0.97      0.97       982
           5       0.97      0.97      0.97       892
           6       0.98      0.98      0.98       958
           7       0.97      0.96      0.97      1028
           8       0.96      0.96      0.96       974
           9       0.96      0.95      0.95      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000



In [26]:
print(confusion_matrix(y_test,pred_rf))

[[ 970    0    1    0    0    2    2    1    4    0]
 [   0 1123    3    3    0    2    2    0    1    1]
 [   5    0  998    5    2    0    3    9   10    0]
 [   0    0   11  971    0    8    0    9    9    2]
 [   1    0    2    0  955    0    4    0    2   18]
 [   3    0    0    9    2  863    5    2    5    3]
 [   6    3    0    0    5    4  938    0    2    0]
 [   1    3   22    1    1    0    0  990    3    7]
 [   4    0    4    7    3    5    3    3  937    8]
 [   6    5    3   12   16    2    1    5    4  955]]


### Support Vector Machine

In [27]:
from sklearn.svm import SVC
model_svc = SVC()

In [28]:
model_svc.fit(x_train, y_train)

In [29]:
pred_svc = model_svc.predict(x_test)

In [30]:
print(f"The accuracy score is {accuracy_score(y_test, pred_svc)*100}%")

The accuracy score is 97.92%


### Gradient Boost Classifier

In [31]:
from sklearn.ensemble import GradientBoostingClassifier
model_gbc = GradientBoostingClassifier()

In [32]:
model_gbc.fit(x_train, y_train)

In [33]:
pred_gbc = model_gbc.predict(x_test)

In [34]:
print(f"The accuracy score is {accuracy_score(y_test, pred_gbc)*100}%")

The accuracy score is 94.59%


### XGBoost Classifier

In [35]:
import xgboost as xgb

In [36]:
model_xgb = xgb.XGBClassifier()

In [37]:
model_xgb.fit(x_train, y_train)

In [38]:
pred_xgb = model_xgb.predict(x_test)

In [39]:
print(f"The accuracy score is {accuracy_score(y_test, pred_xgb)*100}%")

The accuracy score is 97.95%


In [40]:
print(classification_report(y_test, pred_xgb))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.98      0.98      1032
           3       0.98      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.98      0.98       892
           6       0.98      0.98      0.98       958
           7       0.98      0.97      0.98      1028
           8       0.98      0.98      0.98       974
           9       0.97      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



In [41]:
print(confusion_matrix(y_test,pred_xgb))
# sns.heatmap(confusion_matrix(y_test,pred_k),cmap='viridis',annot=True);  

[[ 970    1    0    0    0    2    3    1    2    1]
 [   1 1125    3    2    0    1    2    1    0    0]
 [   5    0 1008    7    2    0    0    6    3    1]
 [   1    0    3  990    0    3    0    6    4    3]
 [   0    0    2    1  958    0    4    0    2   15]
 [   2    1    0    5    0  872    4    3    3    2]
 [   5    3    1    0    2    4  940    0    3    0]
 [   1    1   13    1    2    0    0 1000    1    9]
 [   3    1    2    3    3    4    2    1  951    4]
 [   5    4    1    5    6    0    0    4    3  981]]


# Using Neural Networks 

In [57]:
from tensorflow.keras.callbacks import EarlyStopping
model_nn = keras.Sequential(
    [
        keras.Input(shape=(784,)),# Notice the comma after 784 to specify a tuple with a single element
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

In [58]:
print(model_nn.summary())

None


In [59]:
model_nn.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),#Try other optimizers like  Adagrade , Gradient decent with momentum ,RMSprop for diff accu
    metrics=["accuracy"],
)

In [60]:
# Define early stopping
early_stopping = EarlyStopping(
    monitor='loss',
    patience=5,
    restore_best_weights=True
)

model_nn.fit(x_train,y_train,batch_size=32,epochs=10,verbose=2,callbacks=[early_stopping])

Epoch 1/10
1875/1875 - 9s - 5ms/step - accuracy: 0.9401 - loss: 0.1956
Epoch 2/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9733 - loss: 0.0881
Epoch 3/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9811 - loss: 0.0606
Epoch 4/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9843 - loss: 0.0485
Epoch 5/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9879 - loss: 0.0382
Epoch 6/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9895 - loss: 0.0324
Epoch 7/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9915 - loss: 0.0284
Epoch 8/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9917 - loss: 0.0267
Epoch 9/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9921 - loss: 0.0255
Epoch 10/10
1875/1875 - 8s - 4ms/step - accuracy: 0.9941 - loss: 0.0189


<keras.src.callbacks.history.History at 0x21e071d9640>

In [61]:
model_nn.evaluate(x_test,y_test,batch_size=32,verbose=2)

313/313 - 1s - 2ms/step - accuracy: 0.9807 - loss: 0.0820


[0.08202967792749405, 0.9807000160217285]

In [62]:
pred_nn_arr = model_nn.predict(x_test)
pred_nn = np.argmax(pred_nn_arr,axis = 1) 

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [63]:
print(f"The accuracy score is {accuracy_score(y_test, pred_nn)*100}%")

The accuracy score is 98.07000000000001%


In [64]:
print(classification_report(y_test, pred_nn))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.99      0.98      0.98      1032
           3       0.97      0.99      0.98      1010
           4       0.99      0.97      0.98       982
           5       0.99      0.97      0.98       892
           6       0.97      0.99      0.98       958
           7       0.97      0.98      0.98      1028
           8       0.98      0.98      0.98       974
           9       0.97      0.98      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



In [65]:
print(confusion_matrix(y_test,pred_nn))
# sns.heatmap(confusion_matrix(y_test,pred_k),cmap='viridis',annot=True);  

[[ 966    1    1    2    0    0    7    1    1    1]
 [   0 1124    2    3    0    1    2    1    2    0]
 [   0    0 1011    7    1    0    1    8    4    0]
 [   0    0    1  996    0    2    0    5    3    3]
 [   0    0    3    0  953    0    5    2    2   17]
 [   3    0    0   12    0  864    7    2    1    3]
 [   2    2    0    1    2    4  945    0    2    0]
 [   0    3    6    0    0    0    0 1011    3    5]
 [   0    2    1    6    2    4    4    2  950    3]
 [   0    2    0    4    5    0    0    9    2  987]]


# Using Convolutional Neural Networks

In [75]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [77]:
model_cnn = keras.Sequential([
    layers.Rescaling(1./255, input_shape=(28, 28, 1)),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(10)
])

In [79]:
print(model_cnn.summary())

None


In [80]:
model_cnn.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),#Try other optimizers like  Adagrade , Gradient decent with momentum ,RMSprop for diff accu
    metrics=["accuracy"],
)

In [81]:
# Define early stopping
early_stopping = EarlyStopping(
    monitor='loss',
    patience=5,
    restore_best_weights=True
)

model_cnn.fit(X_train,Y_train,batch_size=32,epochs=10,verbose=2,callbacks=[early_stopping])

Epoch 1/10
1875/1875 - 15s - 8ms/step - accuracy: 0.9566 - loss: 0.1385
Epoch 2/10
1875/1875 - 10s - 5ms/step - accuracy: 0.9856 - loss: 0.0474
Epoch 3/10
1875/1875 - 9s - 5ms/step - accuracy: 0.9894 - loss: 0.0350
Epoch 4/10
1875/1875 - 9s - 5ms/step - accuracy: 0.9915 - loss: 0.0274
Epoch 5/10
1875/1875 - 9s - 5ms/step - accuracy: 0.9924 - loss: 0.0256
Epoch 6/10
1875/1875 - 9s - 5ms/step - accuracy: 0.9937 - loss: 0.0194
Epoch 7/10
1875/1875 - 10s - 5ms/step - accuracy: 0.9943 - loss: 0.0171
Epoch 8/10
1875/1875 - 10s - 5ms/step - accuracy: 0.9955 - loss: 0.0142
Epoch 9/10
1875/1875 - 10s - 5ms/step - accuracy: 0.9959 - loss: 0.0128
Epoch 10/10
1875/1875 - 9s - 5ms/step - accuracy: 0.9959 - loss: 0.0129


<keras.src.callbacks.history.History at 0x21e07150a10>

In [82]:
model_cnn.evaluate(X_test, Y_test, batch_size=32, verbose=2)

313/313 - 1s - 3ms/step - accuracy: 0.9893 - loss: 0.0433


[0.04326127842068672, 0.989300012588501]

In [85]:
pred_cnn_arr = model_cnn.predict(X_test)
pred_cnn = np.argmax(pred_cnn_arr, axis = 1) 

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [86]:
print(f"The accuracy score is {accuracy_score(y_test, pred_cnn)*100}%")

The accuracy score is 98.92999999999999%


In [87]:
print(classification_report(y_test, pred_cnn))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99       980
           1       1.00      0.99      1.00      1135
           2       0.98      0.99      0.99      1032
           3       0.98      1.00      0.99      1010
           4       0.99      0.99      0.99       982
           5       0.99      0.99      0.99       892
           6       0.97      0.99      0.98       958
           7       1.00      0.98      0.99      1028
           8       1.00      0.98      0.99       974
           9       0.98      0.99      0.99      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000



In [88]:
print(confusion_matrix(y_test,pred_cnn))
# sns.heatmap(confusion_matrix(y_test,pred_k),cmap='viridis',annot=True);  

[[ 964    0    0    1    1    0   13    0    0    1]
 [   1 1129    1    2    0    0    2    0    0    0]
 [   0    0 1026    1    0    0    3    2    0    0]
 [   0    0    1 1007    0    2    0    0    0    0]
 [   0    0    1    0  973    0    3    1    1    3]
 [   0    0    0    6    0  880    1    0    0    5]
 [   1    3    0    0    0    2  952    0    0    0]
 [   0    1   13    4    0    0    0 1003    0    7]
 [   2    0    2    2    1    2    3    0  959    3]
 [   0    0    0    2    4    3    0    0    0 1000]]
