In [1]:
import numpy as np 
import cv2 

# 这个大的图像是由很多小图像组成的，
img  = cv2.imread("assets/digits.png")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# 将图像分割成5000个小单元，每个小单元的尺寸是 20*20
ceils = [np.hsplit(row,100) for row in np.vsplit(gray, 50)]

# 变成一个 numpy 的数组，大小为 (50,100, 20,20)
x = np.array(ceils)

# 准备训练数据和测试数据
train = x[:,:50].reshape(-1,400).astype(np.float32) # 大小为 (2500, 400)
test = x[:,50:100].reshape(-1,400).astype(np.float32) # 大小为 (2500, 400)

# 创建标签
k = np.arange(10)
train_labels = np.repeat(k, 250)[:,np.newaxis] # 大小为 (2500, 10)
test_labels = train_labels.copy()

# 初始化 kNN 训练器和测试；
knn = cv2.ml.KNearest_create()
knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)
ret, result, neighbours, dist = knn.findNearest(test, k=5)

# 检查正确率
matches = result == test_labels
correct = np.count_nonzero(matches)
accuracy = correct * 100.0 / result.size
print(accuracy)

91.76


In [2]:
# 保存数据
np.savez("knn_data.npz", train=train, train_labels=train_labels)

# 加载数据
with np.load("knn_data.npz") as data:
    print(data.files)
    train = data["train"]
    train_labels = data["train_labels"]
