## Ensemble

In [1]:
import numpy as np

with open("data/train_pixels.npy", "rb") as f:
    X_train = np.load(f)
    
with open("data/train_labels.npy", "rb") as f:
    y_train = np.load(f)
    
with open("data/test_pixels.npy", "rb") as f:
    X_test = np.load(f)
    
with open("data/test_labels.npy", "rb") as f:
    y_test = np.load(f)

with open("data/train_landmarks.npy", "rb") as f:
    X_trainLAND = np.load(f)
    
with open("data/test_landmarks.npy", "rb") as f:
    X_testLAND = np.load(f)

In [2]:
X_trainSVM = X_train.reshape(-1, 48*48)
X_testSVM = X_test.reshape(-1, 48*48)

## Model 1 - SVM with Pixels

In [3]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA as RandomizedPCA

n_components = 110
pca1 = RandomizedPCA(n_components=n_components, whiten=True, random_state=42).fit(X_trainSVM)

X_train_pca1 = pca1.transform(X_trainSVM)
X_test_pca1 = pca1.transform(X_testSVM)

SVM1 = SVC(kernel='rbf', class_weight='balanced', probability=True)
SVM1 = SVM1.fit(X_train_pca1, y_train)

## Model 2 - SVM with Landmarks

In [4]:
n_components = 80
pca2 = RandomizedPCA(n_components=n_components, whiten=True, random_state=42).fit(X_trainLAND)

X_train_pca2 = pca2.transform(X_trainLAND)
X_test_pca2 = pca2.transform(X_testLAND)

SVM2 = SVC(kernel='rbf', class_weight='balanced', probability=True)
SVM2 = SVM2.fit(X_train_pca2, y_train)

## Model 3 - XGBoost with Pixels

In [5]:
from xgboost import XGBClassifier

emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

XGB1 = XGBClassifier(objective='multi:softprob', 
                         num_class= len(emotions), 
                         use_label_encoder=False,
                         eval_metric='mlogloss')

print("fitting")
XGB1.fit(X_trainSVM, y_train)

fitting


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eval_metric='mlogloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=12,
              num_class=7, num_parallel_tree=1, objective='multi:softprob',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

## Model 4 - XGBoost with Landmarks

In [6]:
XGB2 = XGBClassifier(objective='multi:softprob', 
                         num_class= len(emotions), 
                         use_label_encoder=False,
                         eval_metric='mlogloss')

print("fitting")
XGB2.fit(X_trainLAND, y_train)

fitting


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eval_metric='mlogloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=12,
              num_class=7, num_parallel_tree=1, objective='multi:softprob',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

## Model Predictions

#### Softvoting
Here we take the probability distribution of the predictions and sum them over all predictions.  Then we take the argmax as the final predicted class.

In [11]:
pred1 = SVM1.predict_proba(X_test_pca1)
pred2 = SVM2.predict_proba(X_test_pca2)
pred3 = XGB1.predict_proba(X_testSVM)
pred4 = XGB2.predict_proba(X_testLAND)

In [13]:
total = pred1 + pred2 + pred3 + pred4

In [14]:
final_pred = np.argmax(total, axis=1)

In [15]:
from sklearn.metrics import accuracy_score

score = accuracy_score(y_test, final_pred)
print(score)

0.6093619392588465


#### Hardvoting
Here we take the majority vote from each models final prediction.

In [17]:
def most_frequent(List):
    counter = 0
    num = List[0]
      
    for i in List:
        curr_frequency = List.count(i)
        if(curr_frequency> counter):
            counter = curr_frequency
            num = i
  
    return num

In [16]:
pred1b = SVM1.predict(X_test_pca1)
pred2b = SVM2.predict(X_test_pca2)
pred3b = XGB1.predict(X_testSVM)
pred4b = XGB2.predict(X_testLAND)

In [19]:
final_pred = []
for i in range(len(X_testLAND)):
    tmp = [pred1b[i], pred2b[i], pred3b[i], pred4b[i]]
    final_pred.append(most_frequent(tmp))

In [20]:
from sklearn.metrics import accuracy_score

score = accuracy_score(y_test, final_pred)
print(score)

0.6182780718863193
