#Image classification with Python and Scikit learn

In [3]:
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [4]:
import os
import pickle
import pandas as pd
from pathlib import Path # Added this import

from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


# prepare data

clf_dir = Path("/content/gdrive/MyDrive/computer-vision-data/clf-data")

categories = ['empty', 'not_empty'] # Defined categories

data = []
labels = []
for category_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(clf_dir, category)): # Changed input_dir to clf_dir
        img_path = os.path.join(clf_dir, category, file) # Changed input_dir to clf_dir
        img = imread(img_path)
        img = resize(img, (15, 15))
        data.append(img.flatten())
        labels.append(category_idx)

data = np.asarray(data)
labels = np.asarray(labels)

# train / test split
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# train classifier
classifier = SVC()

parameters = [{'gamma': [0.01, 0.001, 0.0001], 'C': [1, 10, 100, 1000]}]

grid_search = GridSearchCV(classifier, parameters)

grid_search.fit(x_train, y_train)

# test performance
best_estimator = grid_search.best_estimator_

y_prediction = best_estimator.predict(x_test)

score = accuracy_score(y_prediction, y_test)

print('{}% of samples were correctly classified'.format(str(score * 100)))

pickle.dump(best_estimator, open('./model.p', 'wb'))

100.0% of samples were correctly classified
