In [1]:
import numpy as np
import cv2 as cv

In [12]:
# Read image
img = cv.imread('../digits.png')
print(img.size)
print(img.shape)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

6000000
(1000, 2000, 3)


In [14]:
print('rows of digits:', 1000 / 20)
print('columns of digits:', 2000 / 20)
print('number of handwritten digits:', 50 * 100)

rows of digits: 50.0
columns of digits: 100.0
number of handwritten digits: 5000


In [16]:
# Splitting the image to 5000 cells, each 20x20 size
cells = [np.hsplit(row, 100) for row in np.vsplit(gray, 50)]

In [19]:
print(type(cells))

# Make into Numpy arrays: size will be (50, 100, 20, 20)
x = np.array(cells)

<class 'list'>


In [21]:
x[0,0].shape

(20, 20)

In [22]:
# Preparing training and test data
train = x[:, :50].reshape(-1, 400).astype(np.float32) # Size = (2500, 400)

In [None]:
train.shape
x.shape
y = x[:, :50].reshape(2500, 400)
y2 = x[:, :50]
y2.shape
y[0, 0]
y2[0, 0]

In [32]:
test = x[:, 50:100].reshape(-1, 400).astype(np.float32) # Size = (2500, 400)

In [45]:
# Create labels for train and test data
# Create standing vectors
k = np.arange(10)
k
train_labels_ = np.repeat(k, 250)
train_labels_
train_label = np.repeat(k, 250)[:, np.newaxis]
print(train_label.shape)
print(train_label)

(2500, 1)
[[0]
 [0]
 [0]
 ...
 [9]
 [9]
 [9]]


In [51]:
x = np.arange(3)
print(x)
print(x[:, np.newaxis])

[0 1 2]
[[0]
 [1]
 [2]]


In [52]:
test_labels = train_label.copy()

In [55]:
# Initiate kNN, train it on the training data, then test it with the test data with k=1
knn = cv.ml.KNearest_create()
knn.train(train, cv.ml.ROW_SAMPLE, train_label) # row_sample... defines that samples are rows and not columns
ret, result, neigbours, dist = knn.findNearest(test, k=5)

In [60]:
print(ret)
print(result) # resulting label of test samples from model

0.0
[[0.]
 [0.]
 [0.]
 ...
 [9.]
 [9.]
 [9.]]


In [67]:
# Check accuracy of classification
matches = result == test_labels
# print(matches)
correct = np.count_nonzero(matches)
accuracy = correct * 100.0 / result.size
print(accuracy)

91.76


In [68]:
# Store data in files
np.savez('knn_data.npz', train=train, train_labels=train_labels)

In [71]:
# Load data from file
with np.load('knn_data.npz') as data:
    print(data.files)
    train = data['train']
    train_labels = data['train_labels']
    
data.files

['train', 'train_labels']


['train', 'train_labels']