<a href="https://colab.research.google.com/github/victorgau/Python_ML_DL/blob/master/4-05%20手寫數字辨識.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 手寫數字辨識

參考連結：

* http://scikit-learn.org/stable/tutorial/basic/tutorial.html
* https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

## 使用 sklearn 裡面的數字資料集

In [None]:
from sklearn import datasets

digits = datasets.load_digits()

In [None]:
digits.data

In [None]:
digits.target

In [None]:
digits.images[0]

In [None]:
digits.images[0].shape

In [None]:
plt.imshow(digits.images[0], cmap="gray_r")

In [None]:
for i in range(8):
    for j in range(8):
        plt.subplot(8, 8, i*8+j+1)
        plt.imshow(digits.images[i*8+j], cmap="gray_r")

## 使用 SVM 來做辨識

In [None]:
clf = SVC(gamma=0.001, C=100.)

In [None]:
cross_val_score(clf, digits.data, digits.target, cv=10).mean()

In [None]:
# Training the model
clf.fit(digits.data, digits.target) 

## 實際應用

In [None]:
import cv2

In [None]:
im = cv2.imread('test2.jpg')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(imgray,127,255,0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

In [None]:
boxes = []
for c in contours:
    x, y, w, h = cv2.boundingRect(c)
    if 80 < h < 300:
        boxes.append((x, y, w, h))

In [None]:
boxes

In [None]:
for (x, y, w, h) in boxes:
    cv2.rectangle(im, (x, y), (x+w, y+h), (0, 255, 0), 2)

In [None]:
plt.imshow(im[...,::-1])

In [None]:
boxes.sort()

In [None]:
def get_image(x):
    return thresh[boxes[x][1]:boxes[x][1]+boxes[x][3],boxes[x][0]:boxes[x][0]+boxes[x][2]]

## 不管 Aspect Ratio，直接縮小成 (8, 8) 來辨識看看

In [None]:
fig = plt.figure()
c = len(boxes)
for n in range(c):
    testimg = cv2.resize(get_image(n), (8, 8), interpolation = cv2.INTER_AREA)
    testimg = (255-testimg)/255*16
    fig.add_subplot(1, c, n+1)
    plt.imshow(testimg, cmap="gray_r")
    testimg = testimg.reshape(1, 64)
    plt.title(clf.predict(testimg)[0])

In [None]:
n = 6
h, w = get_image(n).shape
ratio = 8 / h
nw = int(w * ratio)
testimg = cv2.resize(get_image(n), (nw, 8), interpolation = cv2.INTER_AREA)
testimg = (255-testimg)/255*16
p1 = (8-nw)//2
p2 = 8-p1-nw
p01 = np.zeros((8, p1))
p02 = np.zeros((8, p2))
testimg = np.hstack((p01, testimg, p02))
plt.imshow(testimg, cmap="gray_r")
testimg = testimg.reshape(1, 64)
clf.predict(testimg)

In [None]:
fig = plt.figure()
c = len(boxes)
for n in range(c):
    h, w = get_image(n).shape
    ratio = 8 / h
    nw = int(w * ratio+0.5)
    testimg = cv2.resize(get_image(n), (nw, 8), interpolation = cv2.INTER_AREA)
    testimg = (255-testimg)/255*16
    p1 = (8-nw)//2
    p2 = 8-p1-nw
    p01 = np.zeros((8, p1))
    p02 = np.zeros((8, p2))
    testimg = np.hstack((p01, testimg, p02))
    fig.add_subplot(1, c, n+1)
    plt.imshow(testimg, cmap="gray_r")
    testimg = testimg.reshape(1, 64)
    plt.title(clf.predict(testimg)[0])