## Ensemble model
The ensemble combines five models:
1. best L1 value
2. best L2 value
3. augmented 
4. best dropout value 
5. best model architecture (TBD)

This notebook performs and saves the ensemble predictions.

Import packages

In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, precision_score
import matplotlib.pyplot as plt
from numpy_ml.neural_nets.losses import CrossEntropy as np_CrossEntropy
from scipy.special import softmax
import torch
import pandas as pd

Load logits 

In [2]:
models_path = "/home/jt9744/COS429/429_Final_new/ensemble_models/"
L1_name = "30epochs_l1_lr_0.1_ld_0.0001"
L2_name = "30epochs_l2_lr_0.1_wd_1e-11"
augment_name = "60epochs_l2_lr_0.1_wd_1e-07_augment"
dropout_name = "30epochs_l1_lr_0.1_ld_1e-07_dropout_06" 
architecture_name = "30epochs_l2_lr_0.1_wd_1e-07_1lesslayer" 
baseline_name = "30epochs_l2_lr_0.1_wd_1e-07"
num_classes = 11 # from previous knowledge, e.g. check preprocess.ipynb

In [13]:
L1_logits = np.loadtxt(models_path+'LOGITS_'+L1_name)
L2_logits = np.loadtxt(models_path+'LOGITS_'+L2_name)
aug_logits = np.loadtxt(models_path+'LOGITS_'+augment_name)
drop_logits = np.loadtxt(models_path+'LOGITS_'+dropout_name)
arch_logits = np.loadtxt(models_path+'LOGITS_'+architecture_name)
baseline_logits = np.loadtxt(models_path+'LOGITS_'+baseline_name)

In [16]:
L1_preds = L1_logits.argmax(axis=1) # convert logits into predictions for evaluating accuracy
np.savetxt(models_path+"PREDS_L1", L1_preds) # save predictions

L2_preds = L2_logits.argmax(axis=1) # convert logits into predictions for evaluating accuracy
np.savetxt(models_path+"PREDS_L2", L2_preds) # save predictions

aug_preds = aug_logits.argmax(axis=1) # convert logits into predictions for evaluating accuracy
np.savetxt(models_path+"PREDS_aug", aug_preds) # save predictions

drop_preds = drop_logits.argmax(axis=1) # convert logits into predictions for evaluating accuracy
np.savetxt(models_path+"PREDS_drop", drop_preds) # save predictions

arch_preds = arch_logits.argmax(axis=1) # convert logits into predictions for evaluating accuracy
np.savetxt(models_path+"PREDS_arch", arch_preds) # save predictions

baseline_preds = baseline_logits.argmax(axis=1)
np.savetxt(models_path+"PREDS_baseline", baseline_preds) # save predictions

In [15]:
ground_truth = np.loadtxt(models_path+'TRUTH_'+L1_name).astype(np.int8) # all ground truths are the same

Function to evaluate model performance

In [5]:
# returns accuracy, f1 score, average f1, and confusion matrix for the data
def eval_metrics(ground_truth, predictions, num_classes):

    # dictionary containing the accuracy, precision, f1, avg f1, and confusion matrix for the data
    f1 = f1_score(y_true=ground_truth, y_pred=predictions, labels=np.arange(num_classes), average=None)
    metrics = {
        "accuracy": accuracy_score(y_true=ground_truth, y_pred=predictions),
        "f1": f1,
        "average f1": np.mean(f1),
        "confusion matrix": confusion_matrix(y_true=ground_truth, y_pred=predictions, labels=np.arange(num_classes)),
        "precision": precision_score(y_true=ground_truth, y_pred=predictions, labels=np.arange(num_classes), average=None)
        }
    
    return metrics

In [8]:
logits = [L1_logits, L2_logits, aug_logits, drop_logits, arch_logits]
logits = np.array([x for x in logits if x is not None]) # remove non-existing logits while waiting for them to train
ensemble_logits = np.mean(logits, axis = 0)

In [9]:
ensemble_preds = ensemble_logits.argmax(axis=1) # convert logits into predictions for evaluating accuracy
np.savetxt(models_path+"PREDS_ensemble", ensemble_preds) # save ensemble predictions