In [None]:
# Uncomment if necessary

In [5]:
#!pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o

In [None]:
#!pip install altair

In [6]:
import h2o
from h2o.estimators import (
    H2OGeneralizedLinearEstimator, 
    H2ORandomForestEstimator, 
    H2OGradientBoostingEstimator, 
    H2ONaiveBayesEstimator,
    H2OStackedEnsembleEstimator,
    H2ODeepLearningEstimator

)
from h2o.frame import H2OFrame
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,4 days 0 hours 34 mins
H2O_cluster_timezone:,Europe/Prague
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.6
H2O_cluster_version_age:,2 months and 8 days
H2O_cluster_name:,H2O_from_python_vladi_8uz4d9
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,4.694 Gb
H2O_cluster_total_cores:,16
H2O_cluster_allowed_cores:,16


### GLOBAL PRESETS

In [8]:
import warnings
warnings.filterwarnings('ignore')

TEST_SIZE = 0.2

Throughout the project we reference many times the paper: **Practical considerations for specifying a super learner**
https://arxiv.org/pdf/2204.06139

### DATA LOADING AND PREPROCESSING

In [11]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import pandas as pd

In [12]:
spam_data = fetch_openml(data_id=44, as_frame=True)
spam_df = spam_data.frame

X = spam_df.iloc[:, :-1]  # All columns except the last are features
y = spam_df.iloc[:, -1]   # The last column is the target (spam or not)


y = y.astype(int)

# Split the dataset into training (80%) and testing (20%)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)

# We split the temporary dataset into training and test
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=TEST_SIZE, random_state=42)

# We use validation here as test data to compare the individual stacks to not spoil the final test data
h2o_train = H2OFrame(pd.DataFrame(X_train).assign(label=y_train.values))
h2o_val = H2OFrame(pd.DataFrame(X_val).assign(label=y_val.values))
h2o_test = H2OFrame(pd.DataFrame(X_test).assign(label=y_test.values))

# Conversion of target columns to categorical
h2o_train['label'] = h2o_train['label'].asfactor()
h2o_val['label'] = h2o_val['label'].asfactor()
h2o_test['label'] = h2o_test['label'].asfactor()

print("Training set size:", h2o_train.nrows)
print("Validation set size:", h2o_val.nrows)
print("Testing set size:", h2o_test.nrows)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Training set size: 2944
Validation set size: 736
Testing set size: 921


#### Is SPAM class underepresented?

In [14]:
class_1 = len(spam_df[spam_df['class'] == '1'])
class_0 = len(spam_df[spam_df['class'] == '0'])
print(f"Records containing spam: {class_1}")
print(f"Records not containing spam: {class_0}")

Records containing spam: 1813
Records not containing spam: 2788


#### Computing the effective sample size n_eff (from paper)

We have binary data, the prevalence of Y is **p=class_1 / total_size**, subsequently **n_rare=n*min(p, 1-p)**, and finally **n_eff=min(n, 5*n_rare)** 

In [16]:
n = len(spam_df)

p = class_1 / n
n_rare = n * min(p, 1-p)
n_eff = min(n, 5*n_rare)
n_eff

4601

#### Computing the V for V-fold cross-validation
Since n_eff >= 500 but not >= 5000 we should select a value between 20 and 10. We take in account that n_eff is closer to 5000 and so we focus on V slightly higher than 10.

In [18]:
N_FOLDS = 12

### BASE LEARNERS - TRAINING & EVALUATION

In [20]:
# # Train each base learner using cross-validation
# for name, learner in base_learners.items():
#     print(f"Training {name} with {N_FOLDS}-fold cross-validation...")
#     learner.train(x=list(range(X_train.shape[1])), y="label", training_frame=h2o_train)

# results = {}
# for name, learner in base_learners.items():
#     performance = learner.model_performance(test_data=h2o_test)
#     f1_score = performance.F1()[0][1]  
#     auc_pr = performance.aucpr()      
#     accuracy = performance.accuracy()[0][1]
#     results[name] = accuracy
#     results[name] = {"F1-Score": f1_score, "AUC-PR": auc_pr, "Accuracy": accuracy}
#     print(f"{name} - F1-Score: {f1_score:.4f}, AUC-PR: {auc_pr:.4f}, Accuracy (Test Set): {accuracy:.4f}")


# # Print results
# print("Base Learner Results:", results)

# base_models = list(base_learners.values())

In [21]:
def train_evaluate_stack(base_learners, metalearner, h2o_train, h2o_test, X_train):
    
    # TRAIN BASE LEARNERS
    print("\n>>> Training base learners:\n")
    for name, learner in base_learners.items():
        print(f"    Training {name} with {N_FOLDS}-fold cross-validation...")
        learner.train(x=list(range(X_train.shape[1])), y="label", training_frame=h2o_train)

    super_learner = H2OStackedEnsembleEstimator(
        base_models=list(base_learners.values()),
        metalearner_algorithm=metalearner
    )
    # TRAIN THE METALEARNER
    print("\n>>> Training super learner:\n")
    super_learner.train(x=list(range(X_train.shape[1])), y="label", training_frame=h2o_train)

    # EVAL BASE LEARNERS
    print("\n>>> Base learners' results:\n")
    results = {}
    for name, learner in base_learners.items():
        performance = learner.model_performance(test_data=h2o_test)
        f1_score = performance.F1()[0][1]  
        auc_pr = performance.aucpr()      
        accuracy = performance.accuracy()[0][1]
        results[name] = accuracy
        results[name] = {"F1-Score": f1_score, "AUC-PR": auc_pr, "Accuracy": accuracy}
        print(f"    {name} - F1-Score: {f1_score:.4f}, AUC-PR: {auc_pr:.4f}, Accuracy (Test Set): {accuracy:.4f}")
    
    # EVAL THE METALEARNER
    print("\n>>> Metalearner's results:\n")
    super_performance = super_learner.model_performance(test_data=h2o_test)
    super_accuracy = super_performance.accuracy()[0][1]
    super_f1 = super_performance.F1()[0][1]  
    super_auc_pr = super_performance.aucpr()  
    # print(f"\n    Super Learner - F1-Score: {super_f1:.4f}, AUC-PR: {super_auc_pr:.4f} | Super Learner Accuracy: {super_accuracy:.4f}")
    
    
    # print("\nFinal Results Comparison:")
    # for name, metrics in results.items():
    #     print(f"{name} - F1-Score: {metrics['F1-Score']:.4f}, AUC-PR: {metrics['AUC-PR']:.4f}, Accuracy: {metrics['Accuracy']:.4f}")
        
    print(f"    Super Learner - F1-Score: {super_f1:.4f}, AUC-PR: {super_auc_pr:.4f}, Accuracy: {super_accuracy:.4f}")
    return {"F1-Score": super_f1, "AUC-PR": super_auc_pr, "Accuracy": super_accuracy}


# Ablation studies

In the following we tried a more methodological way of building the stack. 
We tried two approaches and evaluated their effects on the final test metrics:

**1) Building the stack from simpler models adding more complex ones:**

In this method we start from a base consisting of simple models which we assume would capture the main / most general pattern in the data.
Afterwards we gradually try adding more complex models to extend the stack capabilities to capture more finer intricacies and more complex (perhaps non-linear) relationships in the data and we observe the effect on the test metrics.


**1) Building the stack from more complex models adding more general/simple ones:**
In this method we start from a base consisting of more complex models which we assume would capture the complex relationships in data well and then
we try to bring down the variance by adding simpler models that don't overfit to the data so much.



### A more efficient variant would be training each model only once in case it is present in multiple combinations.

Due to the tradeoff between the scope of this project and time capabilities we perform only superficial overview. If the problem would be a topic of major research where the time needed to search the vast hypothesis space is available, we would suggest performing more extensive per-class tests with higher hyperparameter sampling granularity to better observe how they affect the models performance.

In [24]:
simple_to_complex01 = {                   
    "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}

simple_to_complex02 = {                   
    "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),   
}

simple_to_complex03 = {                   
    "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}

simple_to_complex04= {                   
    "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}


simple_to_complex05 = {                   
        "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32": H2ODeepLearningEstimator(
                        hidden=[32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}


simple_to_complex06 = {                   
        "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32": H2ODeepLearningEstimator(
                        hidden=[32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}


simple_to_complex07 = {            
        "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32_16": H2ODeepLearningEstimator(
                    hidden=[32, 16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),
}


simple_to_complex08 = {         
        "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32_16": H2ODeepLearningEstimator(
                    hidden=[32, 16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),
}

simple_to_complex09 = {       
        "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32": H2ODeepLearningEstimator(
                        hidden=[32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32_16": H2ODeepLearningEstimator(
                    hidden=[32, 16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),

}

simple_to_complex10 = {       
        "LogisticRegression_binomial": H2OGeneralizedLinearEstimator(
                        family="binomial", nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32_32": H2ODeepLearningEstimator(
                        hidden=[32, 32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),
}

neural_net_stacks = [
                        simple_to_complex01,
                        simple_to_complex02,
                        simple_to_complex03,
                        simple_to_complex04,
                        simple_to_complex05,
                        simple_to_complex06,
                        simple_to_complex07,
                        simple_to_complex08,
                        simple_to_complex09,
                        simple_to_complex10
]

In [25]:
comparative_results_nns = dict()

for i, stack in enumerate(neural_net_stacks):
    comparative_results_nns[i] = train_evaluate_stack(stack, "glm", h2o_train, h2o_val, X_train)


>>> Training base learners:

    Training LogisticRegression_binomial with 12-fold cross-validation...
glm Model Build progress: |██████████████████████████████████████████████████████| (done) 100%

>>> Training super learner:

stackedensemble Model Build progress: |██████████████████████████████████████████| (done) 100%

>>> Base learners' results:

    LogisticRegression_binomial - F1-Score: 0.9160, AUC-PR: 0.9554, Accuracy (Test Set): 0.9334

>>> Metalearner's results:

    Super Learner - F1-Score: 0.9160, AUC-PR: 0.9554, Accuracy: 0.9334

>>> Training base learners:

    Training LogisticRegression_binomial with 12-fold cross-validation...
glm Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
    Training NeuralNetwork_6 with 12-fold cross-validation...
deeplearning Model Build progress: |█████████████████████████████████████████████| (done) 100%

>>> Training super learner:

stackedensemble Model Build progress: |█████████████████████████

In [26]:
comparative_results_nns

{0: {'F1-Score': 0.9159519725557461,
  'AUC-PR': 0.9553716837388596,
  'Accuracy': 0.9334239130434783},
 1: {'F1-Score': 0.9228187919463088,
  'AUC-PR': 0.9657863242915544,
  'Accuracy': 0.938858695652174},
 2: {'F1-Score': 0.934673366834171,
  'AUC-PR': 0.9714855885175135,
  'Accuracy': 0.9470108695652174},
 3: {'F1-Score': 0.9261744966442954,
  'AUC-PR': 0.9697625548762007,
  'Accuracy': 0.9415760869565217},
 4: {'F1-Score': 0.931323283082077,
  'AUC-PR': 0.9680440989821836,
  'Accuracy': 0.9442934782608695},
 5: {'F1-Score': 0.9322033898305085,
  'AUC-PR': 0.9739866255335934,
  'Accuracy': 0.9456521739130435},
 6: {'F1-Score': 0.937181663837012,
  'AUC-PR': 0.973981874845631,
  'Accuracy': 0.9497282608695652},
 7: {'F1-Score': 0.937181663837012,
  'AUC-PR': 0.9727419381430318,
  'Accuracy': 0.9497282608695652},
 8: {'F1-Score': 0.938566552901024,
  'AUC-PR': 0.9740263720350233,
  'Accuracy': 0.9510869565217391},
 9: {'F1-Score': 0.9290540540540541,
  'AUC-PR': 0.9647187332112436,
  

In [27]:
simple_to_complex01 = {                   
   "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
}

simple_to_complex02 = {                   
   "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),   
}

simple_to_complex03 = {                   
   "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}

simple_to_complex04= {                   
   "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}


simple_to_complex05 = {
    "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_32": H2ODeepLearningEstimator(
                        hidden=[32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}


simple_to_complex06 = {                   
    "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32": H2ODeepLearningEstimator(
                        hidden=[32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
}


simple_to_complex07 = {                   
    "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_32_16": H2ODeepLearningEstimator(
                    hidden=[32, 16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),
}


simple_to_complex08 = {                   
    "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32_16": H2ODeepLearningEstimator(
                    hidden=[32, 16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),
}

simple_to_complex09 = {                   
    "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_6": H2ODeepLearningEstimator(
                        hidden=[6], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_16": H2ODeepLearningEstimator(
                        hidden=[16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32": H2ODeepLearningEstimator(
                        hidden=[32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ), 
                    "NeuralNetwork_32_16": H2ODeepLearningEstimator(
                    hidden=[32, 16], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),

}

simple_to_complex10 = {              
    "NaiveBayes": H2ONaiveBayesEstimator(nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True),
                    "NeuralNetwork_32_32": H2ODeepLearningEstimator(
                        hidden=[32, 32], epochs=300, nfolds=N_FOLDS, seed=42, keep_cross_validation_predictions=True
                    ),
}

neural_net_stacks_nbayes = [
                        simple_to_complex01,
                        simple_to_complex02,
                        simple_to_complex03,
                        simple_to_complex04,
                        simple_to_complex05,
                        simple_to_complex06,
                        simple_to_complex07,
                        simple_to_complex08,
                        simple_to_complex09,
                        simple_to_complex10
]

In [38]:
comparative_results_nns_nbayes = dict()

for i, stack in enumerate(neural_net_stacks_nbayes):
    comparative_results_nns_nbayes[i] = train_evaluate_stack(stack, "glm", h2o_train, h2o_val, X_train)


>>> Training base learners:

    Training NaiveBayes with 12-fold cross-validation...
naivebayes Model Build progress: |███████████████████████████████████████████████| (done) 100%

>>> Training super learner:

stackedensemble Model Build progress: |██████████████████████████████████████████| (done) 100%

>>> Base learners' results:

    NaiveBayes - F1-Score: 0.8303, AUC-PR: 0.7914, Accuracy (Test Set): 0.8628

>>> Metalearner's results:

    Super Learner - F1-Score: 0.8303, AUC-PR: 0.7895, Accuracy: 0.8628

>>> Training base learners:

    Training NaiveBayes with 12-fold cross-validation...
naivebayes Model Build progress: |███████████████████████████████████████████████| (done) 100%
    Training NeuralNetwork_6 with 12-fold cross-validation...
deeplearning Model Build progress: |█████████████████████████████████████████████| (done) 100%

>>> Training super learner:

stackedensemble Model Build progress: |██████████████████████████████████████████| (done) 100%

>>> Base learners' 

### RESULTS COMPARISON BETWEEN LOG REGRESSION AND NAIVE BAYES

In [40]:
comparative_results_nns

{0: {'F1-Score': 0.9159519725557461,
  'AUC-PR': 0.9553716837388596,
  'Accuracy': 0.9334239130434783},
 1: {'F1-Score': 0.9228187919463088,
  'AUC-PR': 0.9657863242915544,
  'Accuracy': 0.938858695652174},
 2: {'F1-Score': 0.934673366834171,
  'AUC-PR': 0.9714855885175135,
  'Accuracy': 0.9470108695652174},
 3: {'F1-Score': 0.9261744966442954,
  'AUC-PR': 0.9697625548762007,
  'Accuracy': 0.9415760869565217},
 4: {'F1-Score': 0.931323283082077,
  'AUC-PR': 0.9680440989821836,
  'Accuracy': 0.9442934782608695},
 5: {'F1-Score': 0.9322033898305085,
  'AUC-PR': 0.9739866255335934,
  'Accuracy': 0.9456521739130435},
 6: {'F1-Score': 0.937181663837012,
  'AUC-PR': 0.973981874845631,
  'Accuracy': 0.9497282608695652},
 7: {'F1-Score': 0.937181663837012,
  'AUC-PR': 0.9727419381430318,
  'Accuracy': 0.9497282608695652},
 8: {'F1-Score': 0.938566552901024,
  'AUC-PR': 0.9740263720350233,
  'Accuracy': 0.9510869565217391},
 9: {'F1-Score': 0.9290540540540541,
  'AUC-PR': 0.9647187332112436,
  

In [42]:
comparative_results_nns_nbayes

{0: {'F1-Score': 0.8302521008403362,
  'AUC-PR': 0.7894555265975739,
  'Accuracy': 0.8627717391304348},
 1: {'F1-Score': 0.9215017064846416,
  'AUC-PR': 0.9661636992124756,
  'Accuracy': 0.9375},
 2: {'F1-Score': 0.9280821917808219,
  'AUC-PR': 0.9676140352009869,
  'Accuracy': 0.9429347826086957},
 3: {'F1-Score': 0.9326424870466321,
  'AUC-PR': 0.9752237825046188,
  'Accuracy': 0.9470108695652174},
 4: {'F1-Score': 0.92991452991453,
  'AUC-PR': 0.9716579209003223,
  'Accuracy': 0.9442934782608695},
 5: {'F1-Score': 0.9383561643835617,
  'AUC-PR': 0.9750213128235201,
  'Accuracy': 0.9510869565217391},
 6: {'F1-Score': 0.9376053962900506,
  'AUC-PR': 0.9622156812521568,
  'Accuracy': 0.9497282608695652},
 7: {'F1-Score': 0.9238410596026491,
  'AUC-PR': 0.9734709687279451,
  'Accuracy': 0.9402173913043478},
 8: {'F1-Score': 0.9384359400998336,
  'AUC-PR': 0.9785098469345997,
  'Accuracy': 0.9497282608695652},
 9: {'F1-Score': 0.9174917491749176,
  'AUC-PR': 0.9595778756537523,
  'Accura

### LEGEND EXPLANATION
#### LR - Logistic Regression
#### NNX - MLP with X neurons in 1 hidden layer
#### NNX_Y - MLP with X neurons in 1st hidden layer and Y neurons in 2nd hidden layer
#### NB - Naive Bayes

In [45]:
labels = ["LR", "LR_NN6", "LR_NN16", "LR_NN6_NN16", "LR_NN32", "LR_NN6_NN16_NN32", "LR_NN32_16", "LR_NN6_NN62_16", "LR_NN6_NN16_NN32_NN32_16", "LR_NN32_32"]

data = []
for label, record in zip(labels, comparative_results_nns.values()):
    record["Configuration"] = label
    data.append(record)
data
df = pd.DataFrame(data)

In [47]:
import altair as alt

f1_chart = alt.Chart(df).mark_point(fill="blue").encode(
    y=alt.Y('Configuration', sort="-x"),
    x=alt.X('F1-Score').scale(zero=False)
).properties(width=150)

aucpr_chart = alt.Chart(df).mark_point(fill="orange", stroke="orange").encode(
    y=alt.Y('Configuration', sort="-x"),
    x=alt.X('AUC-PR').scale(zero=False)
).properties(width=150)

accuracy_chart = alt.Chart(df).mark_point(fill="purple", stroke="purple").encode(
    y=alt.Y('Configuration', sort="-x"),
    x=alt.X('Accuracy').scale(zero=False)
).properties(width=150)
aucpr_chart | f1_chart | accuracy_chart

In [49]:
labels = ["NB", "NB_NN6", "NB_NN16", "NB_NN6_NN16", "NB_NN32", "NB_NN6_NN16_NN32", "NB_NN32_16", "NB_NN6_NN62_16", "NB_NN6_NN16_NN32_NN32_16", "NB_NN32_32"]

data = []
for label, record in zip(labels, comparative_results_nns_nbayes.values()):
    record["Configuration"] = label
    data.append(record)
data
df = pd.DataFrame(data)

In [51]:
f1_chart = alt.Chart(df).mark_point(fill="blue").encode(
    y=alt.Y('Configuration', sort="-x"),
    x=alt.X('F1-Score').scale(zero=False)
).properties(width=150)

aucpr_chart = alt.Chart(df).mark_point(fill="orange", stroke="orange").encode(
    y=alt.Y('Configuration', sort="-x"),
    x=alt.X('AUC-PR').scale(zero=False)
).properties(width=150)

accuracy_chart = alt.Chart(df).mark_point(fill="purple", stroke="purple").encode(
    y=alt.Y('Configuration', sort="-x"),
    x=alt.X('Accuracy').scale(zero=False)
).properties(width=150)
aucpr_chart | f1_chart | accuracy_chart