In [1]:
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from pyimagesearch.datasets.simple_datasetloader import SimpleDatasetLoader
from pyimagesearch.preprocessing.simple_preprocessor import SimplePreprocessor

In [3]:
from imutils.paths import list_images
image_paths = list(list_images('../pyimagesearch/datasets/animals'))
image_paths

['../pyimagesearch/datasets/animals/cats/cats_00591.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00861.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00851.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00401.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00355.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00599.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00191.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00938.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00536.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00434.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00276.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00076.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00378.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00780.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00216.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00534.jpg',
 '../pyimagesearch/datasets/animals/cats/cats_00733.jpg',
 '../pyimagese

In [5]:
preprocessors = [SimplePreprocessor(width=32, height=32)]
dataset_loader = SimpleDatasetLoader(preprocessors)
(data, labels) = dataset_loader.load(image_paths, verbose=500)

[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [6]:
data.shape # (image_count, width, height, channel)

(3000, 32, 32, 3)

In [7]:
data = data.reshape(data.shape[0], -1) # (image_count, feature_vector_length)

In [8]:
data.shape

(3000, 3072)

In [9]:
# show some information on memory consumption of the images
print("[INFO] features matrix: {:.1f}MB".format(
data.nbytes / (1024 * 1024.0)))

[INFO] features matrix: 8.8MB


In [20]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [11]:
labels[0]

'cats'

In [12]:
# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)
labels[0]

0

In [16]:
# partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)

In [19]:
# train and evaluate a k-NN classifier on the raw pixel intensities
print("[INFO] evaluating k-NN classifier...")
# -1 jobs for k-NN distance (-1 uses all available cores)
model = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
model.fit(trainX, trainY)

[INFO] evaluating k-NN classifier...


In [22]:
print(classification_report(testY, model.predict(testX)))

              precision    recall  f1-score   support

           0       0.40      0.49      0.44       262
           1       0.35      0.47      0.40       239
           2       0.86      0.36      0.50       249

    accuracy                           0.44       750
   macro avg       0.53      0.44      0.45       750
weighted avg       0.53      0.44      0.45       750



In [24]:
# instead single neighbor, this time using k=3
model = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
model.fit(trainX, trainY)
print(classification_report(testY, model.predict(testX)))

              precision    recall  f1-score   support

           0       0.40      0.53      0.46       262
           1       0.38      0.51      0.44       239
           2       0.91      0.27      0.42       249

    accuracy                           0.44       750
   macro avg       0.56      0.44      0.44       750
weighted avg       0.56      0.44      0.44       750

