## 1. Import Libraries

In [1]:
import os
import numpy as np
import pickle

from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

## 2. Parameters

In [2]:
input_path = "./data"
categories = ['empty_aug', 'full_aug'] # empty = 2,463 images, full = 2,599 images

data = []
labels = []

resize_shape = (15, 15)
test_size = 0.2

# gamma: {'scale','auto'} or float, default='scale'
#   Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
# C: float, default=1.0
#   Regularization parameter. The strength of the regularization is inversely proportional to C
#   Must be strictly positive. The penalty is a squared l2 penalty
clf_param = [{'gamma':[0.01, 0.001, 0.0001],
              'C':[1, 10, 100, 1000]}]

## 3. Data Preparation

In [4]:
for cat_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(input_path, category)):
        img_path = os.path.join(input_path, category, file)
        try:
            img = imread(img_path)
            
            # resize image to 15 x 15 box. Also, RGB values per pixel
            # img.shape = (15,15,3)
            img = resize(img, resize_shape)
            
            # flatten 15 x 15 x 3 to 675
            # data.shape = (1,675)
            data.append(img.flatten())
            
            # 0: empty, 1: full
            labels.append(cat_idx)
        except:
            pass
        
data = np.asarray(data)
labels = np.asarray(labels)

## 4. Train Test Split

In [5]:
# stratify: keep label proportions equal for train and test
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    labels,
                                                    test_size=test_size,
                                                    shuffle=True,
                                                    stratify=labels)


In [6]:
# x_train: [[0.628, 0.588, 0.527, ...]
# y_train: [0,1,1,0,0...]
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((4346, 675), (1087, 675), (4346,), (1087,))

## 5. Train Classifier

In [7]:
clf = SVC()
# We will train many image classifiers with clf_param
# gamma has 3 values, C has 4 values, so total of 12 classifiers
grid_search = GridSearchCV(clf, clf_param)

grid_search.fit(X_train, y_train)

## 6. Test Model

In [8]:
best_est = grid_search.best_estimator_
best_est

In [9]:
y_pred = best_est.predict(X_test)

In [10]:
print("Accuracy = {:.10f}%".format(accuracy_score(y_test, y_pred) * 100))

Accuracy = 100.0000000000%


## 7. Save Model

In [11]:
pickle.dump(best_est, open('./model/yuko_svc_best_est.p', 'wb'))