In [23]:
import numpy as np
import pandas as pd

#visualization
import matplotlib.pyplot as plt
# import seaborn as sns

# import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [24]:
# load numpy array
data = np.load('./data/data_pca_50_target.npz')
data.files

['arr_0', 'arr_1']

In [25]:
data.allow_pickle = True

In [26]:
X = data['arr_0'] #pca data with 50 components
Y = data['arr_1'] # target or dependent variable

In [27]:
X.shape, Y.shape

((5609, 50), (5609,))

In [28]:
## split the data in to train and test set

In [29]:
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, stratify=Y)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(4487, 50) (1122, 50) (4487,) (1122,)


In [30]:
###train the machine learning model

In [31]:
model_svc = SVC(probability=True)
param_grid = {'C': [0.5,1,10,20,30,50],
              'kernel': ['rbf', 'poly'],
              'gamma':[0.1,0.05,0.01,0.001,0.002,0.005],
              'coef0': [0,1]}

In [32]:
model_grid = GridSearchCV(model_svc, param_grid=param_grid, scoring='accuracy', cv=3, verbose=2)

In [33]:
model_grid.fit(x_train, y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits


[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   4.9s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   6.0s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   4.9s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   2.4s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   2.3s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   2.4s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   3.6s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   3.5s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   3.5s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   2.2s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   2.1s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   2.1s
[CV] END .............C=0.5,

In [34]:
model_grid.best_params_

{'C': 1, 'coef0': 0, 'gamma': 0.05, 'kernel': 'rbf'}

In [35]:
model_finale = model_grid.best_estimator_

In [36]:
model_finale.get_params()

{'C': 1,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 0.05,
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [37]:
#vvaluation

In [38]:
y_pred = model_finale.predict(x_test)

In [39]:
y_pred

array(['female', 'male', 'female', ..., 'male', 'female', 'female'],
      dtype=object)

In [40]:
###classification report

In [41]:
cr = metrics.classification_report(y_test, y_pred, output_dict=True)
pd.DataFrame(cr).T

Unnamed: 0,precision,recall,f1-score,support
female,0.833049,0.812292,0.82254,602.0
male,0.788785,0.811538,0.8,520.0
accuracy,0.811943,0.811943,0.811943,0.811943
macro avg,0.810917,0.811915,0.81127,1122.0
weighted avg,0.812535,0.811943,0.812094,1122.0


In [42]:
#kappa Score

In [43]:
metrics.cohen_kappa_score(y_test, y_pred)

0.6226076421545966

In [44]:
#area under curve(AUC)

In [45]:
metrics.roc_auc_score(np.where(y_test=='male', 1, 0), np.where(y_pred=='male', 1, 0))

0.8119154101712242

In [46]:
###save face recognition model
import pickle

In [47]:
pickle.dump(model_finale, open('./model/model_svm.pickle', mode='wb'))