# Hand Written Digit Classification

## import library

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

## import Data

In [4]:
from sklearn.datasets import load_digits

In [5]:
df = load_digits()

## Data Preprocession

In [6]:
df.images.shape

(1797, 8, 8)

In [7]:
df.images[0]

array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.],
       [ 0.,  0., 13., 15., 10., 15.,  5.,  0.],
       [ 0.,  3., 15.,  2.,  0., 11.,  8.,  0.],
       [ 0.,  4., 12.,  0.,  0.,  8.,  8.,  0.],
       [ 0.,  5.,  8.,  0.,  0.,  9.,  8.,  0.],
       [ 0.,  4., 11.,  0.,  1., 12.,  7.,  0.],
       [ 0.,  2., 14.,  5., 10., 12.,  0.,  0.],
       [ 0.,  0.,  6., 13., 10.,  0.,  0.,  0.]])

In [8]:
df.images[0].shape

(8, 8)

In [9]:
n_samples = len(df.images)
data = df.images.reshape((n_samples,-1))

In [10]:
data[0]

array([ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
       15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
       12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
        0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
       10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.])

In [11]:
data[0].shape

(64,)

In [12]:
data.shape

(1797, 64)

## Scaling Data

In [13]:
data .min()

0.0

In [14]:
data.max()

16.0

In [15]:
data = data/16

In [16]:
data.min()

0.0

In [17]:
data.max()

1.0

In [18]:
data[0]

array([0.    , 0.    , 0.3125, 0.8125, 0.5625, 0.0625, 0.    , 0.    ,
       0.    , 0.    , 0.8125, 0.9375, 0.625 , 0.9375, 0.3125, 0.    ,
       0.    , 0.1875, 0.9375, 0.125 , 0.    , 0.6875, 0.5   , 0.    ,
       0.    , 0.25  , 0.75  , 0.    , 0.    , 0.5   , 0.5   , 0.    ,
       0.    , 0.3125, 0.5   , 0.    , 0.    , 0.5625, 0.5   , 0.    ,
       0.    , 0.25  , 0.6875, 0.    , 0.0625, 0.75  , 0.4375, 0.    ,
       0.    , 0.125 , 0.875 , 0.3125, 0.625 , 0.75  , 0.    , 0.    ,
       0.    , 0.    , 0.375 , 0.8125, 0.625 , 0.    , 0.    , 0.    ])

## Train Test Split Data

In [19]:
from sklearn.model_selection import train_test_split

In [20]:
x_train,x_test,y_train,y_test = train_test_split(data,df.target, test_size=0.7,random_state=222529)

In [21]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((539, 64), (1258, 64), (539,), (1258,))

## Randomforest Model

In [22]:
from sklearn.ensemble import RandomForestClassifier

In [23]:
rf = RandomForestClassifier()

In [24]:
rf.fit(x_train,y_train)

RandomForestClassifier()

## Predict Test Data

In [25]:
y_pred = rf.predict(x_test)

In [26]:
y_pred

array([9, 0, 4, ..., 9, 2, 0])

## Model Acuracy

In [27]:
from sklearn.metrics import confusion_matrix, classification_report

In [28]:
confusion_matrix(y_test,y_pred)

array([[124,   0,   0,   0,   1,   0,   0,   0,   0,   0],
       [  0, 125,   0,   0,   0,   0,   0,   0,   0,   0],
       [  1,   6, 117,   0,   0,   0,   0,   0,   2,   0],
       [  0,   0,   1, 120,   0,   0,   0,   1,   5,   0],
       [  0,   1,   0,   0, 120,   0,   0,   3,   0,   1],
       [  0,   0,   0,   0,   1, 122,   1,   0,   0,   1],
       [  0,   1,   0,   0,   1,   0, 129,   0,   0,   0],
       [  0,   0,   0,   0,   4,   0,   0, 113,   0,   1],
       [  0,   8,   1,   1,   1,   0,   1,   1, 114,   0],
       [  0,   1,   0,   3,   0,   2,   0,   1,   2, 120]])

In [29]:
print(classification_report(y_test , y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       125
           1       0.88      1.00      0.94       125
           2       0.98      0.93      0.96       126
           3       0.97      0.94      0.96       127
           4       0.94      0.96      0.95       125
           5       0.98      0.98      0.98       125
           6       0.98      0.98      0.98       131
           7       0.95      0.96      0.95       118
           8       0.93      0.90      0.91       127
           9       0.98      0.93      0.95       129

    accuracy                           0.96      1258
   macro avg       0.96      0.96      0.96      1258
weighted avg       0.96      0.96      0.96      1258

