# Handwritten Digit Recognition 
ระบบรู้จำเลขลายมือเขียน

In [None]:
from sklearn import datasets 
import matplotlib.pyplot as plt
import numpy as np

## Dataset

In [None]:
digits = datasets.load_digits()         # The digits dataset
digits.keys()

In [None]:
print(digits.DESCR)

In [None]:
digits.images.shape

In [None]:
digits.data.shape

In [None]:
# digits.images[:1]
digits.images[:2]

In [None]:
digits.data[:1]

In [None]:
digits.target[:40]

In [None]:
digits.target.shape

In [None]:
img = digits.images[2]
# img = digits.images[1]
img = digits.images[0]
img.shape

In [None]:
print(img)

In [None]:
np.isnan(digits.images).sum()    # Missing values (มี Nan หรือไม่)
# np.isnan(digits.data).sum()       

## Visualization

In [None]:
img = digits.images[0]

plt.figure(figsize=(2, 2))
plt.imshow(img) 
plt.show()

In [None]:
# img = digits.images[1796]
img = digits.images[110]
plt.figure(figsize=(2, 2))
plt.imshow(img, cmap=plt.cm.gray)
plt.show()

In [None]:
plt.figure(figsize=(2, 2))
plt.imshow(img, cmap=plt.cm.gray_r)
# plt.axis('off')
plt.show()

In [None]:
def visual_multi(i):
    '''Plots 40 digits, เริ่ม ที่ digit i'''
    nplots = 40
    fig = plt.figure(figsize=(8, 4)) # 6x6
    for j in range(nplots):
#         plt.subplot(3,5,j+1)
        plt.subplot(4, 10, j+1)
        plt.imshow(digits.images[i+j], cmap=plt.cm.gray_r)
        plt.title(digits.target[i+j])
        # plt.axis('off')
        plt.xticks([])
        plt.yticks([])
        # plt.tick_params(bottom=False)

    plt.show()

In [None]:
visual_multi(1000)

## เตรียมข้อมูล (Prepare data)

In [None]:
digits.images.shape

In [None]:
digits.images[0]

In [None]:
digits.images[0].shape

In [None]:
n_samples = len(digits.images)  # จำนวน Samples 1797
n_samples

In [None]:
digits.images.shape

In [None]:
X = digits.images.reshape(n_samples, -1)      # reshape แต่ละรูป จาก 8x8 ให้เป็น 1x64 (Vector 1 row)
X.shape

In [None]:
X[:1]       # ข้อมูล sample ลำดับแรก (index 0)
# X[:2]       # ข้อมูล sample 2 ลำดับแรก (index 0, 1)

In [None]:
X[:1].shape

In [None]:
digits.keys()

In [None]:
digits.data.shape

In [None]:
X.shape

In [None]:
X[0]

In [None]:
digits.data[0]

In [None]:
y = digits.target
y[:20]

In [None]:
X.shape

In [None]:
y.shape

## Scatter Plot

In [None]:
from sklearn.decomposition import PCA
import pandas as pd
import seaborn as sns

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

df = pd.DataFrame(X_pca, columns=['pca1','pca2'])
df['labels'] = y

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = 6, 6  
g=sns.scatterplot(data=df, x='pca1', y='pca2', hue='labels', 
                  s=50, palette='Set1', legend='full')
g.legend(loc='center right', bbox_to_anchor=(1.2, 0.5), ncol=1)
plt.title('PCA: 2 Components (1,2)')
plt.show()

## Train-test Split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test,y_train,y_test = train_test_split(X, y, test_size=0.4, random_state=20) # 
X_train.shape , X_test.shape

## Train

In [None]:
from sklearn.svm import SVC

model = SVC()

In [None]:
model.fit(X_train, y_train)

## Evaluation

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

y_pred = model.predict(X_test)

print('Score: {:.4f}' . format(model.score(X_test, y_test)))
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

sns.set_style('white')
plt.rcParams.update({'font.size':12})

cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(cm).plot()
plt.title('Confusion Matrix')

plt.show()

In [None]:
# import scikitplot as skplot
# skplot.metrics.plot_confusion_matrix(y_test, y_pred)
# plt.show()

In [None]:
y_test[:20]

In [None]:
y_pred = model.predict(X_test)
y_pred[:20]

## ที่ทำนายผิด (Missed Classification)

In [None]:
idx_miss = np.where(y_test != y_pred)
idx_miss

In [None]:
y_test[idx_miss]

In [None]:
y_pred[idx_miss]

In [None]:
X_test[idx_miss][0]

In [None]:
X_test[idx_miss][0].reshape(8,8)

In [None]:
fig = plt.figure(figsize=(2, 2)) 
# plt.imshow(X_test[idx_miss][0], cmap=plt.cm.gray_r)
plt.imshow(X_test[idx_miss][0].reshape(8,8), cmap=plt.cm.gray_r)
# plt.imshow(X_test[idx_miss][0].reshape(8,8), cmap=plt.cm.gray)
plt.show()

In [None]:
fig = plt.figure(figsize=(8, 3.5)) 
nplots = 20
for j,k in enumerate(idx_miss[0]):
#     print(j)
    plt.subplot(2, 7, j+1)
    plt.imshow(X_test[k].reshape(8,8), cmap=plt.cm.gray_r)
    title = f'p-->{y_pred[k]}'   # เลขจริง และ ที่ predicted
    plt.title(title)
    plt.xticks([])
    plt.yticks([])
    plt.xlabel(y_test[k])
#     plt.axis('off')
    if j>= nplots-1:
        break

plt.show()

In [None]:
# Re-train the model
model = SVC()
model.fit(X, y)

## Save the Model

In [None]:
import joblib

joblib.dump(model, 'digit_recognition_model.pkl')

## ใช้ Model Predict Image file

In [None]:
!pip show pillow

In [None]:
!pip install Pillow

In [None]:
from PIL import Image, ImageOps

# model = SVC()
# model.fit(X, y)

imgfile = 'data/digits/digit0.png'      # digit image file (number zero)

img = Image.open(imgfile).convert('L')  # convert to grayscale
img

In [None]:
img = ImageOps.invert(img)              # invert
img

In [None]:
img = img.resize((8, 8), Image.ANTIALIAS)  # resize to 8x8 and antialias
img

In [None]:
pixel = np.array(img)  # 8x8
pixel = pixel / 255.0 * 16
pixel = pixel.astype('int')

my_digit = pixel.reshape(1, -1)  # 1x64
predicted = model.predict(my_digit)
#     print('Digit:{}'. format(predicted[0]))

plt.figure(figsize=(2, 2))
plt.imshow(pixel, cmap=plt.cm.gray_r)
plt.title('Predicted:{}'. format(predicted[0]))
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
pixel

In [None]:
my_digit

In [None]:
my_digit.shape

## Decision Regions

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

model.fit(X_pca, y)

from mlxtend.plotting import plot_decision_regions

ax = plot_decision_regions(np.array(X_pca), y, 
                      clf=model, 
                      legend=2) 

handles, labels = ax.get_legend_handles_labels()
class_names = np.unique(y)
ax.legend(handles, class_names, framealpha=0.5, loc='upper left',bbox_to_anchor=(1.1, 1.0))

plt.title('SVM: Digits Decision Regions')

plt.xticks()
plt.yticks()
plt.xlabel('PCA1')
plt.ylabel('PCA2')

plt.show()

## Cross-validation

In [None]:
from sklearn.model_selection import cross_val_score

model = SVC()

cvs = cross_val_score(model, X, y, cv=5)  
print(cvs.round(4))
print('Average Score: {:.3f}' .format(cvs.mean()))

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()

cvs = cross_val_score(model, X, y, cv=5)  
print(cvs.round(4))
print('Average Score: {:.3f}' .format(cvs.mean()))

In [None]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5)

cvs = cross_val_score(model, X, y, cv=5) 
print('cross val scores {}'.format(cvs.round(3)))
print('Average Score: {:.3f}' .format(cvs.mean()))

In [None]:
from sklearn.linear_model import LogisticRegression
# model = LogisticRegression(max_iter=200)
model = LogisticRegression(solver='liblinear',multi_class='auto')

cvs = cross_val_score(model, X, y, cv=5) 
print(cvs.round(4))
print('Average Score: {:.3f}' .format(cvs.mean()))