In [26]:
import numpy as np 
import pandas as pd
import pickle


# Data
- Load data from pickle file
- split the data into independent and dependent 
- split to train and test set 

In [27]:
data = pickle.load(open('./data_face_features_emotion.pickle', mode='rb'))

In [28]:
x = np.array(data['data'])# indendepnd variable
y = np.array(data['label']) # dependt varibale

In [29]:
x.shape , y.shape

((1413, 1, 128), (1413,))

In [30]:
x = x.reshape(-1,128)
x.shape

(1413, 128)

In [31]:
# split the data into train and test
from sklearn.model_selection import train_test_split

In [32]:
x_train,x_test, y_train,y_test = train_test_split(x,y,train_size=0.8,random_state=0)

In [33]:
x_train.shape, x_test.shape,y_train.shape, y_test.shape

((1130, 128), (283, 128), (1130,), (283,))

# Train Machine Learing 

In [34]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score,f1_score

### Logistic Regression 

In [35]:
model_logistic = LogisticRegression()
model_logistic.fit(x_train,y_train) #training logistic Regression

In [36]:
def get_report(model,x_train,y_train,x_test,y_test):
    y_pred_train = model.predict(x_train)
    y_pred_test = model.predict(x_test)

    # accuracy score
    acc_train = accuracy_score(y_train,y_pred_train)
    acc_test = accuracy_score(y_test,y_pred_test)

    # f1 score 
    f1_score_train = f1_score(y_train,y_pred_train,average='macro')
    f1_score_test = f1_score(y_test,y_pred_test,average='macro')
    
    print('Accuracy Train = %0.2f' %acc_train)
    print('Accuracy test = %0.2f' %acc_test)
    print('F1 Score Train = %0.2f' %f1_score_train)
    print('F1 Score Test = %0.2f' %f1_score_test)

In [37]:
get_report(model_logistic,x_train,y_train,x_test,y_test)

Accuracy Train = 0.31
Accuracy test = 0.23
F1 Score Train = 0.23
F1 Score Test = 0.18


# Support Vector Machines

In [38]:
model_svc = SVC(probability=True)
model_svc.fit(x_train,y_train)

In [39]:
get_report(model_svc,x_train,y_train,x_test,y_test)

Accuracy Train = 0.42
Accuracy test = 0.25
F1 Score Train = 0.32
F1 Score Test = 0.17


# Random Forest Classifier

In [40]:
model_rf = RandomForestClassifier(n_estimators=10)
model_rf.fit(x_train,y_train)

In [41]:
get_report(model_rf,x_train,y_train,x_test,y_test)

Accuracy Train = 1.00
Accuracy test = 0.45
F1 Score Train = 0.99
F1 Score Test = 0.46


# Voting Classifier

In [42]:
model_voting = VotingClassifier(estimators=[
    ('logistic',LogisticRegression()),
    ('svc',SVC(probability=True)),
    ('rf',RandomForestClassifier())
],voting='soft',weights=[1,2,3])

In [43]:
model_voting.fit(x_train,y_train)

In [44]:
get_report(model_voting,x_train,y_train,x_test,y_test)

Accuracy Train = 1.00
Accuracy test = 0.46
F1 Score Train = 1.00
F1 Score Test = 0.47


# Parameter Tuning

In [45]:
from sklearn.model_selection import GridSearchCV

In [46]:
model_grid = GridSearchCV(model_voting,
                          param_grid={
                              'svc__C':[3,5,7,10],
                              'svc__gamma':[0.1,0.3,0.5],
                              'rf__n_estimators':[5,10,20],
                              'rf__max_depth':[3,5,7],
                              'voting':['soft','hard']
                          },scoring='accuracy',cv=3,n_jobs=1,verbose=2)

In [47]:
model_grid.fit(x_train,y_train)

Fitting 3 folds for each of 108 candidates, totalling 324 fits
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.1, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.1, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.1, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.3, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.3, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.3, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.5, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.5, voting=soft; total time=   0.2s
[CV] END rf__max_depth=3, rf__n_estimators=5, svc__C=3, svc__gamma=0.5, voting=soft; total time=   0.2s
[

In [48]:
model_best_estimator = model_grid.best_estimator_

In [49]:
model_grid.best_score_

0.34690633406700905

# Save Model 

In [50]:
pickle.dump(model_best_estimator,open('../models/machinelearning_face_emotion_identity.pkl',mode='wb'))
