# Ensemble Model

## Load data and Libraries

In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.decomposition import PCA as RandomizedPCA
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Training Data
X_train = np.load("data/train_pixels.npy") 
X_trainLAND = np.load("data/train_landmarks.npy")
y_train = np.load("data/train_labels.npy")
    
# Public Test Data - Used to Validate Training
X_test = np.load("data/eval_pixels.npy")
X_testLAND = np.load("data/eval_landmarks.npy")
y_test = np.load("data/eval_labels.npy")

# Private Test Data - Used for Final Prediction
X_final = np.load("data/test_pixels.npy")
X_finalLAND = np.load("data/test_landmarks.npy")
y_final = np.load("data/test_labels.npy")

# Emotions
emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

In [3]:
X_trainSVM = X_train.reshape(-1, 48*48)
X_testSVM = X_test.reshape(-1, 48*48)
X_finalSVM = X_final.reshape(-1, 48*48)

## Base Classifiers

### Model 1 - SVM with Pixels

In [4]:
# 110 was determined during tuning
n_components = 110
pca1 = RandomizedPCA(n_components=n_components, whiten=True, random_state=42).fit(X_trainSVM)

X_train_pca1 = pca1.transform(X_trainSVM)
X_test_pca1 = pca1.transform(X_testSVM)
X_final_pca1 = pca1.transform(X_finalSVM)

SVM1 = SVC(kernel='rbf', class_weight='balanced', probability=True)
SVM1 = SVM1.fit(X_train_pca1, y_train)

### Model 2 - SVM with Landmarks

In [5]:
# 80 was determined during tuning
n_components = 80
pca2 = RandomizedPCA(n_components=n_components, whiten=True, random_state=42).fit(X_trainLAND)

X_train_pca2 = pca2.transform(X_trainLAND)
X_test_pca2 = pca2.transform(X_testLAND)
X_final_pca2 = pca2.transform(X_finalLAND)

SVM2 = SVC(kernel='rbf', class_weight='balanced', probability=True)
SVM2 = SVM2.fit(X_train_pca2, y_train)

### Model 3 - XGBoost with Pixels

In [6]:
XGB1 = XGBClassifier(objective='multi:softprob', 
                         num_class= len(emotions), 
                         use_label_encoder=False,
                         eval_metric='mlogloss')

print("fitting")
XGB1.fit(X_trainSVM, y_train)

fitting


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eval_metric='mlogloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=12,
              num_class=7, num_parallel_tree=1, objective='multi:softprob',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

### Model 4 - XGBoost with Landmarks

In [7]:
XGB2 = XGBClassifier(objective='multi:softprob', 
                         num_class= len(emotions), 
                         use_label_encoder=False,
                         eval_metric='mlogloss')

print("fitting")
XGB2.fit(X_trainLAND, y_train)

fitting


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eval_metric='mlogloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=12,
              num_class=7, num_parallel_tree=1, objective='multi:softprob',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

## Ensemble Voting

### Soft Voting
Here we take the probability distribution of the predictions and sum them over all predictions.  Then we take the argmax as the final predicted class.

In [8]:
def soft_vote(p1, p2, p3, p4):
    total = p1 + p2 + p3 + p4
    
    return np.argmax(total, axis=1)

In [9]:
pred1 = SVM1.predict_proba(X_test_pca1)
pred2 = SVM2.predict_proba(X_test_pca2)
pred3 = XGB1.predict_proba(X_testSVM)
pred4 = XGB2.predict_proba(X_testLAND)

In [10]:
sv_pred = soft_vote(pred1, pred2, pred3, pred4)

In [11]:
score = accuracy_score(y_test, sv_pred)
print(score)

0.6322095291167457


### Hard Voting
Here we take the majority vote from each models final prediction.

In [19]:
def most_frequent(List):
    counter = 0
    num = List[0]
      
    for i in List:
        curr_frequency = List.count(i)
        if(curr_frequency> counter):
            counter = curr_frequency
            num = i
  
    return num

def maj_vote(p1, p2, p3, p4):
    final_pred = []
    
    for i in range(len(X_testLAND)):
        tmp = [p1[i], p2[i], p3[i], p4[i]]
        final_pred.append(most_frequent(tmp))
    
    return final_pred

In [13]:
pred1b = SVM1.predict(X_test_pca1)
pred2b = SVM2.predict(X_test_pca2)
pred3b = XGB1.predict(X_testSVM)
pred4b = XGB2.predict(X_testLAND)

In [14]:
hv_pred = maj_vote(pred1b, pred2b, pred3b, pred4b)

In [15]:
score = accuracy_score(y_test, hv_pred)
print(score)

0.6341599331290053


## Final Ensemble Prediction
Here we use the private evaluation dataset and make our final prediction with our best ensemble (Hard Voting).

In [16]:
pred1final = SVM1.predict(X_final_pca1)
pred2final = SVM2.predict(X_final_pca2)
pred3final = XGB1.predict(X_finalSVM)
pred4final = XGB2.predict(X_finalLAND)

In [20]:
final_pred = maj_vote(pred1final, pred2final, pred3final, pred4final)

In [21]:
score = accuracy_score(y_final, final_pred)
print(score)

0.6182780718863193
