# Image Classification using $k$-NN

In [1]:
import os

import cv2
from imutils import paths
import numpy as np

In [2]:
image_paths = list(paths.list_images('datasets/animals'))

In [3]:
data = []
labels = []

In [4]:
for image_path in image_paths:
    image = cv2.imread(image_path)
    label = image_path.split(os.path.sep)[-2]
    image = cv2.resize(image, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(image)
    labels.append(label)

In [5]:
data = np.array(data)
labels = np.array(labels)

In [6]:
from sklearn.neighbors import KNeighborsClassifier

In [7]:
data = data.reshape((data.shape[0], 3072))

In [8]:
from sklearn.preprocessing import LabelEncoder

In [9]:
le = LabelEncoder()

In [10]:
labels = le.fit_transform(labels)

In [11]:
labels

array([1, 1, 1, ..., 2, 2, 2])

In [12]:
le.classes_

array(['cats', 'dogs', 'panda'], dtype='<U5')

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [14]:
trainX, testX, trainY, testY = train_test_split(data, labels, test_size=0.25, random_state=30)

In [15]:
knn = KNeighborsClassifier(n_neighbors=5)

In [16]:
knn.fit(trainX, trainY)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [17]:
y_pred = knn.predict(testX)

In [18]:
print(classification_report(testY, y_pred, target_names=le.classes_))

             precision    recall  f1-score   support

       cats       0.47      0.50      0.48       271
       dogs       0.38      0.59      0.46       239
      panda       0.90      0.33      0.48       240

avg / total       0.58      0.47      0.48       750

