In [1]:
# prepare data
# train / test split
# train classifier
# test performance

In [2]:
# scikit image is the image processing library

In [3]:
# stratified sampling is keeping same proportions of different labels

In [4]:
import os
import pickle
from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [5]:
# data preparation

In [6]:
input_dir = r'C:\Users\US593\Work\Datasets\Male-Female_Image_Classification\data\training'

categories = ['male', 'female']

data = []
labels = []
for category_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(input_dir,category)):
        img_path = os.path.join(input_dir, category, file)
        img = imread(img_path)
        img = resize(img, (15, 15))
        data.append(img.flatten())
        labels.append(category_idx)

data = np.asarray(data)
labels = np.asarray(labels)

In [7]:
data.shape

(11649, 675)

In [8]:
labels.shape

(11649,)

In [9]:
# train/test split

In [11]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

In [12]:
# train classifier

In [13]:
classifier = SVC()

parameters = [{'gamma': [0.01, 0.001, 0.0001], 'C': [1, 10, 100, 1000]}]

grid_search = GridSearchCV(classifier, parameters)

grid_search.fit(x_train, y_train)

In [14]:
# test performance

In [15]:
best_estimator = grid_search.best_estimator_

In [16]:
y_prediction = best_estimator.predict(x_test)

score = accuracy_score(y_prediction, y_test)

print(score)

0.9356223175965666


In [17]:
# saving model

In [18]:
pickle.dump(best_estimator, open('./model.p', 'wb'))