In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
import os

# Convert img to feature vector

In [None]:
def image_to_feature_vector(image, size=(32,32)):
    return cv2.resize(image, size).flatten()

In [None]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    else:
        cv2.normalize(hist, hist)
    return hist.flatten()

In [None]:
# grab the list of images that we'll be describing
print("[INFO] describing images...")
imagePaths = r"D:\machinelearning_algorithm\dataset\train"
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
rawImages = []
features = []
labels = []
# print(os.listdir(imagePaths))
for (i, imagepath) in enumerate(os.listdir(imagePaths)):
    image = cv2.imread(os.path.join(r"D:\machinelearning_algorithm\dataset\train",imagepath))
    label = imagepath.split(os.path.sep)[-1].split('.')[0]
#     print(image.shape, label)
    pixel = image_to_feature_vector(image)
    hist = extract_color_histogram(image)
    
    rawImages.append(pixel)
    features.append(hist)
    labels.append(label)
    
    if i > 0 and i % 1000 == 0:
        print("[INFO] processed {}/{}".format(i, len(os.listdir(imagePaths))))
#     print(imagepath, label)

In [None]:
# show some information on the memory consumed by the raw images
# matrix and features matrix
rawImages = np.array(rawImages)
features = np.array(features)
labels = np.array(labels)
print("[INFO] pixels matrix: {:.2f}MB".format(rawImages.nbytes / (1024*1024.0)))
print("[INFO] feature matrix: {:.2f}MB".format(features.nbytes / (1024*1024.0)))

In [None]:
(trainRI, testRI, trainRL, testRL) = train_test_split(rawImages, labels, test_size=0.25, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(features, labels, test_size=0.25, random_state=42)

In [None]:
# train and evaluate a K-NN on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy ..")
model = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
model.fit(trainRI, trainRL)
acc = model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc*100))

In [None]:
# train and evaluate a K-NN on the histogram
print("[INFO] evaluating raw pixel accuracy ..")
model = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
model.fit(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc*100))

In [None]:
# Finding the best k in k-NN:
k_search = 0
max_acc = 0
for k in range(1, 100):
    model = KNeighborsClassifier(n_neighbors=k, n_jobs=-1)
    model.fit(trainRI, trainRL)
    acc = model.score(testRI, testRL)
    if acc > max_acc:
        max_acc = acc
        k_search = k
print(f'Best accuracy {max_acc} at k_value {k_search}')

In [None]:
import matplotlib.pyplot as plt
k_search = 0
max_acc = 0
list_accuracy = []
list_k = []
for k in range(1, 100):
    model = KNeighborsClassifier(n_neighbors=k, n_jobs=-1)
    model.fit(trainRI, trainRL)
    acc = model.score(testRI, testRL)
    list_accuracy.append(acc)
    if acc > max_acc:
        max_acc = acc
        k_search = k
    list_k.append(k)
plt.plot(list_accuracy, list_k)
plt.show()