# Overall Workflow
## 1. The Dataset
## 2. Preprocessing Before Modeling
## 3. Hyperparameter Tuning - Bayesian Optimization

# Packages

In [1]:
# Data structures
import numpy as np
import pandas as pd

# Data formats
import csv
import json

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sb

# Statistical testings
from scipy.stats import f
from scipy.stats import ttest_ind
from scipy.stats import chi2_contingency

# Data preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Machine learning algorithms
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
import xgboost as xgb 
import lightgbm as lgb

# Hyperparameter tuning
from hyperopt import hp
from hyperopt import Trials
from hyperopt import tpe
from timeit import default_timer as timer
from hyperopt import STATUS_OK
from hyperopt import fmin
from bayes_opt import BayesianOptimization

# Model validation
from sklearn.model_selection import cross_validate
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score, roc_auc_score, roc_curve, auc, confusion_matrix

# Improvement
from imblearn.over_sampling import SMOTE

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


# 1. The Dataset

In [2]:
data = pd.read_csv('banking.csv')
data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,y
0,44,blue-collar,married,basic.4y,unknown,yes,no,cellular,aug,thu,...,1,999,0,nonexistent,1.4,93.444,-36.1,4.963,5228.1,0
1,53,technician,married,unknown,no,no,no,cellular,nov,fri,...,1,999,0,nonexistent,-0.1,93.2,-42.0,4.021,5195.8,0
2,28,management,single,university.degree,no,yes,no,cellular,jun,thu,...,3,6,2,success,-1.7,94.055,-39.8,0.729,4991.6,1
3,39,services,married,high.school,no,no,no,cellular,apr,fri,...,2,999,0,nonexistent,-1.8,93.075,-47.1,1.405,5099.1,0
4,55,retired,married,basic.4y,no,yes,no,cellular,aug,fri,...,1,3,1,success,-2.9,92.201,-31.4,0.869,5076.2,1


In [3]:
# 41188 data points with 21 predictors.
print(data.shape)

(41188, 21)


In [4]:
# Since the purpose is to build a prediction model, I dropped this column from the feature space.
data.drop(columns='duration', inplace=True)
print(data.shape)

(41188, 20)


# 2. Preprocessing Before Modeling
- **2.1 One-hot encoding**
- **2.2 Dummy feature generation**
- **2.3 Improvement: Over-sampling using SMOTE**
- **2.4 Train & test splitting (80%/20%)**
- **2.5 Standardization**
    - Why? Standardization for the regularization in Logistic Regression and SGD classifier purposes.

## 2.1 One-hot encoding

In [5]:
dummy_features = []
binary_features = []
for each in data.columns:
    if data[each].dtype == 'object':
        if len(data[each].unique()) > 2:
            dummy_features.append(each)
        else: 
            binary_features.append(each)

In [6]:
# One-hot encoding for categorical features containing two levels.
le = LabelEncoder()
le_count = 0

for col in binary_features:
    print(col)
    le.fit(data[col])
    data[col] = le.transform(data[col])
            
    # Keep track of how many columns were label encoded
    le_count += 1
            
print('%d columns were label encoded.' % le_count)

contact
1 columns were label encoded.


In [7]:
# Make sure the transformation is correct!
# 0: cellular
# 1: telephone
data['contact'].unique()

array([0, 1])

## 2.2 Dummy feature generation

In [8]:
# Create dummy features.
sum_len = 0
for col in dummy_features:
    sum_len += len(data[col].unique())
    print(col, len(data[col].unique()))
    
print('{} columns will be additionally added.'.format(sum_len - len(dummy_features)))

job 12
marital 4
education 8
default 3
housing 3
loan 3
month 10
day_of_week 5
poutcome 3
42 columns will be additionally added.


In [9]:
# Original dimension
print(data.shape)

# 20 + 42 = 62, correct dummy feature transformation!
data = pd.get_dummies(data, columns = dummy_features)
print(data.shape)

(41188, 20)
(41188, 62)


In [10]:
data.head()

Unnamed: 0,age,contact,campaign,pdays,previous,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,...,month_oct,month_sep,day_of_week_fri,day_of_week_mon,day_of_week_thu,day_of_week_tue,day_of_week_wed,poutcome_failure,poutcome_nonexistent,poutcome_success
0,44,0,1,999,0,1.4,93.444,-36.1,4.963,5228.1,...,0,0,0,0,1,0,0,0,1,0
1,53,0,1,999,0,-0.1,93.2,-42.0,4.021,5195.8,...,0,0,1,0,0,0,0,0,1,0
2,28,0,3,6,2,-1.7,94.055,-39.8,0.729,4991.6,...,0,0,0,0,1,0,0,0,0,1
3,39,0,2,999,0,-1.8,93.075,-47.1,1.405,5099.1,...,0,0,1,0,0,0,0,0,1,0
4,55,0,1,3,1,-2.9,92.201,-31.4,0.869,5076.2,...,0,0,1,0,0,0,0,0,0,1


## 2.3 Improvement: Over-sampling using SMOTE 

In [11]:
os = SMOTE(sampling_strategy='minority', k_neighbors=10, m_neighbors=20, random_state=0)
os_data_X, os_data_y = os.fit_sample(data[data.columns.difference(['y'])], 
                                     data.y)
os_data_X = pd.DataFrame(data=os_data_X, columns=data.columns.difference(['y']))
os_data_y = pd.DataFrame(data=os_data_y,columns=['y'])

# Check the numbers of over-sampled data
print("length of oversampled data is ", len(os_data_X))
print("Number of no subscription in oversampled data", len(os_data_y[os_data_y['y'] == 0]))
print("Number of subscription", len(os_data_y[os_data_y['y'] == 1]))
print("Proportion of no subscription data in oversampled data is ",
      len(os_data_y[os_data_y['y'] == 0])/len(os_data_y))
print("Proportion of subscription data in oversampled data is ",
      len(os_data_y[os_data_y['y'] == 1])/len(os_data_y))

length of oversampled data is  73096
Number of no subscription in oversampled data 36548
Number of subscription 36548
Proportion of no subscription data in oversampled data is  0.5
Proportion of subscription data in oversampled data is  0.5


## 2.4 Train & test splitting (80%/20%)

In [12]:
x_train, x_test, y_train, y_test = train_test_split(os_data_X, 
                                                    os_data_y, 
                                                    test_size=0.2, random_state=0) 

In [13]:
# Make sure the splitting is correct!
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)

(58476, 61) (14620, 61)
(58476, 1) (14620, 1)


## 2.5 Standardization

In [14]:
# Test data are unseen data! 
# Since my purpose is to build a prediction model, I shouldn't contaminate train and test data together. 
std_scaler = StandardScaler().fit(x_train)

train_standardized_data = std_scaler.transform(x_train)
test_standardized_data = std_scaler.transform(x_test)

In [15]:
# Make sure the standardization is correct (mean must be 0 and variance must be 1) 
# Train data
pd.DataFrame(train_standardized_data).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
count,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,...,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0,58476.0
mean,-1.00202e-14,6.748655e-17,-6.86927e-14,-2.301517e-11,3.667442e-15,-3.870341e-15,-3.901273e-16,1.718602e-15,1.67509e-15,-2.863487e-15,...,-1.329894e-15,-4.945292e-16,1.779803e-15,1.507841e-15,1.18328e-11,-3.371684e-15,1.082046e-15,2.15401e-15,-1.583188e-15,-1.97131e-15
std,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,...,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009,1.000009
min,-1.962015,-0.5680033,-1.983291,-2.026413,-0.6322546,-0.5095551,-0.5276203,-0.5619754,-0.5346215,-0.5361716,...,-0.613969,-0.3350478,-0.2040029,-0.1854645,-1.984152,-2.851768,-0.3683961,-1.983339,-0.3412183,-0.4575588
25%,-0.7633044,-0.5680033,-0.4652984,-0.8507732,-0.6322546,-0.5095551,-0.5276203,-0.5619754,-0.5346215,-0.5361716,...,-0.613969,-0.3350478,-0.2040029,-0.1854645,-0.6875851,0.3529286,-0.3683961,0.5307416,-0.3412183,-0.4575588
50%,-0.199715,-0.1843367,-0.2966325,-0.0555978,-0.6322546,-0.5095551,-0.5276203,-0.5619754,-0.5346215,-0.5361716,...,-0.613969,-0.3350478,-0.2040029,-0.1854645,0.6343143,0.3529286,-0.3683961,0.5307416,-0.3412183,-0.4575588
75%,0.6394753,0.2584066,0.7153626,0.8164442,1.645423,-0.5095551,-0.4646568,0.05189206,-0.2971104,-0.2754336,...,1.657902,-0.3350478,-0.2040029,-0.1854645,1.061513,0.3529286,-0.3683961,0.5307416,-0.3412183,-0.4575588
max,4.835427,23.2311,2.495724,2.04206,1.657076,2.238409,2.150412,2.02644,2.141712,2.135584,...,1.657902,3.104917,5.185049,5.793828,1.061513,0.3529286,3.026884,0.5307416,2.984327,10.1856


# 3. Hyperparameter Tuning - Bayesian Optimization
#### Each model was optimized 100 times to obtain the optimal hyperparameters.
- **3.1 Logistic Regression**
- **3.2 SGD Classifier**
- **3.3 Decision Tree**
- **3.4 Bagging - Decision Tree**
- **3.5 Random Forest**
- **3.6 AdaBoost**
- **3.7 Gradient Boosting**
- **3.8 XGBoost**
- **3.9 LightGBM**

## 3.1 Logistic Regression
- Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
    - penalty: Used to specify the norm used in the penalization.
    - C: Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.
    - fit_intercept: Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.
    - random_state: The seed of the pseudo random number generator to use when shuffling the data.

In [None]:
# Define the search space (hyperparameter space).
space = {
    'penalty': hp.choice('penalty', ['l2', 'l1']),
    'C': hp.uniform('C', 0.01, 1000.0),
    'fit_intercept':hp.choice('fit_intercept', [True, False]),
    'random_state':0,
}

In [None]:
# Define the objective for the optimization.
# Here is to minimize the negative roc_auc (area under the curve) score since roc_auc 
# is a metric that gives a selection between precision and recall.
def objective(hyperparameters):
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    model = LogisticRegression(**hyperparameters)
    cv_results = cross_validate(model, train_standardized_data, y_train, 
                                scoring='roc_auc', cv=5, 
                                return_train_score=False)
    run_time = timer() - start
    
    # Loss must be minimized (put a negative sign)
    loss = -(np.mean(cv_results['test_score']))

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for later evaluations
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
def bayesian_optimize(n_eval):
    
    # Record searching results.
    trials = Trials()
    
    # Create a csv file to store results.
    of_connection = open(OUT_FILE, 'w')
    writer = csv.writer(of_connection)

    ITERATION = 0
    
    # Write column names in the file.
    headers = ['loss', 'hyperparameters', 'iteration', 'runtime']
    writer.writerow(headers)
    of_connection.close()
    
    
    # Run x evaluations.
    MAX_EVALS = n_eval

    # Start optimization!
    best = fmin(fn = objective, 
                space = space, 
                algo = tpe.suggest, 
                trials = trials,
                max_evals = MAX_EVALS)

    return best

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_LR.csv'

In [None]:
bayesian_optimize(100)

## 3.2 SGD Classifier
- Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html
    - loss: The loss function to be used.
    - penalty: The penalty (aka regularization term) to be used.
    - alpha: Constant that multiplies the regularization term.
    - l1_ratio: The Elastic Net mixing parameter. l1_ratio=0 corresponds to L2 penalty, and l1_ratio=1 to L1.
    - fit_intercept: Whether the intercept should be estimated or not.

In [None]:
# Define the search space
space = {
    'loss':'log',
    'penalty': hp.choice('penalty', ['none', 'l2', 'l1', 'elasticnet']),
    'alpha': hp.uniform('alpha', 0.0001, 100.0),
    'l1_ratio': hp.uniform('l1_ratio', 0.0, 1.0),
    'fit_intercept':hp.choice('fit_intercept', [False, True]),
    'random_state':0
}

In [None]:
# Define the objective (here is to minimize mse)
def objective(hyperparameters):
    
    """Objective function for multiple linear regression hyperparameter optimization."""
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    model = SGDClassifier(**hyperparameters)
    cv_results = cross_validate(model, train_standardized_data, y_train, 
                                scoring='roc_auc', cv=5, 
                                return_train_score=False)
    run_time = timer() - start
    
    # Loss must be minimized (put a negative sign)
    loss = -(np.mean(cv_results['test_score']))

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for evaluation
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_SGD.csv'

In [None]:
bayesian_optimize(100)

## 3.3 Decision Tree
- Reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier
    - criterion: The function to measure the quality of a split.
    - max_depth: The maximum depth of the tree.
    - min_samples_split: The minimum number of samples required to split an internal node.
    - min_samples_leaf: The minimum number of samples required to be at a leaf node. (Will produce a conflict since max_depth determines the number of samples at a leaf node too.)
    - random_state

In [None]:
# Define the search space (hyperparameter space).
space = {
    'criterion': hp.choice('criterion', ['gini', 'entropy']),
    'max_depth': hp.randint('max_depth', 58476-1),
    'min_samples_split': hp.uniform('min_samples_split', 0.0, 1.0),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.0, 0.5),
    'random_state': 0,
}

In [None]:
# Define the objective for the optimization.
# Here is to minimize the negative roc_auc (area under the curve) score since roc_auc 
# is a metric that gives a selection between precision and recall.
def objective(hyperparameters):
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    model = DecisionTreeClassifier(**hyperparameters)
    cv_results = cross_validate(model, x_train, y_train, 
                                scoring='roc_auc', cv=5, 
                                return_train_score=False)
    run_time = timer() - start
    
    # Loss must be minimized (put a negative sign)
    loss = -(np.mean(cv_results['test_score']))

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for later evaluations
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_DT.csv'

In [None]:
bayesian_optimize(100)

## 3.4 Bagging - Decision Tree
- Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html
    - base_estimator: The base estimator to fit on random subsets of the dataset.
    - n_estimators: The number of base estimators in the ensemble.
    - max_samples: The number of samples to draw from X to train each base estimator.
    - bootstrap: Whether samples are drawn with replacement.
    - random_state

In [None]:
pd.read_csv('Bank_Marketing_DT.csv').sort_values(by='loss').head(5)

In [None]:
pd.read_csv('Bank_Marketing_DT.csv').sort_values(by='loss').hyperparameters.head(1).values

In [None]:
# Define the search space (hyperparameter space).

# For bagging, it needs to have a base estimator as a parameter. 
# Thus I chose the top one optimal decision tree and a default decision tree
# (to perform bagging, it's better to have a 'deep' tree).
optimal_dt = DecisionTreeClassifier(criterion='entropy',
                                    max_depth=52983,
                                    min_samples_leaf=0.0005254964145084773,
                                    min_samples_split=0.03703819684650395,
                                    random_state=0)

default_dt = DecisionTreeClassifier(max_depth=15, random_state=0)

space = {
    'base_estimator': hp.choice('base_estimator', [optimal_dt, default_dt]),
    'n_estimators': hp.randint('n_estimators', 500),
    'max_samples': hp.uniform('max_samples', 0.0, 1.0),
    'bootstrap': hp.choice('bootstrap', [False, True]),
    'random_state': 0,
}

In [None]:
# Define the objective for the optimization.
# Here is to minimize the negative roc_auc (area under the curve) score since roc_auc 
# is a metric that gives a selection between precision and recall.
def objective(hyperparameters):
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    model = BaggingClassifier(**hyperparameters)
    cv_results = cross_validate(model, x_train, y_train, 
                                scoring='roc_auc', cv=5, 
                                return_train_score=False)
    run_time = timer() - start
    #print(hyperparameters)
    
    # Loss must be minimized (put a negative sign)
    loss = -(np.mean(cv_results['test_score']))

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for later evaluations
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_Bag_DT.csv'

In [None]:
bayesian_optimize(100)

## 3.5 Random Forest
- Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
    - n_estimators: The number of trees in the forest.
    - criterion: The function to measure the quality of a split.
    - max_depth: The maximum depth of the tree.
    - min_samples_split: The minimum number of samples required to split an internal node.
    - min_samples_leaf: The minimum number of samples required to be at a leaf node.
    - max_features: The number of features to consider when looking for the best split.
    - bootstrap: Whether bootstrap samples are used when building trees.
    - random_state

In [None]:
space = {
    'n_estimators': hp.randint('n_estimators', 500),
    'criterion': hp.choice('criterion', ['gini', 'entropy']),
    'max_depth': hp.randint('max_depth', 500),
    'min_samples_split': hp.uniform('min_samples_split', 0.0, 1.0),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.002, 0.5),
    'max_features': hp.uniform('max_features', 0.0, 1.0),
    'bootstrap': hp.choice('bootstrap', [False, True]),
    'random_state': 0
}

In [None]:
# Define the objective for the optimization.
# Here is to minimize the negative roc_auc (area under the curve) score since roc_auc 
# is a metric that gives a selection between precision and recall.
def objective(hyperparameters):
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    model = RandomForestClassifier(**hyperparameters)
    cv_results = cross_validate(model, x_train, y_train, 
                                scoring='roc_auc', cv=5, 
                                return_train_score=False)
    run_time = timer() - start
    #print(hyperparameters)
    
    # Loss must be minimized (put a negative sign)
    loss = -(np.mean(cv_results['test_score']))

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for later evaluations
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_RF.csv'

In [None]:
bayesian_optimize(100)

## 3.6 AdaBoost
- Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html
    - base_estimator: The base estimator from which the boosted ensemble is built.
    - n_estimators: The maximum number of estimators at which boosting is terminated.
    - learning_rate: Learning rate shrinks the contribution of each classifier by learning_rate. There is a trade-off between learning_rate and n_estimators.
    - random_state

In [None]:
# Typically, boosting requires "stumps" (shallow trees with depth from 4 to 8) to improve
# the model performance.
base_1 = DecisionTreeClassifier(max_depth=4,random_state=0)
base_2 = DecisionTreeClassifier(max_depth=5,random_state=0)
base_3 = DecisionTreeClassifier(max_depth=6,random_state=0)
base_4 = DecisionTreeClassifier(max_depth=7,random_state=0)
base_5 = DecisionTreeClassifier(max_depth=8,random_state=0)

In [None]:
space = {
    'base_estimator': hp.choice('base_estimator', [base_1,base_2,base_3,base_4,base_5]),
    'n_estimators': hp.randint('n_estimators', 500),
    'learning_rate': hp.uniform('learning_rate', 0.0, 1.0),
    'random_state': 0
}

In [None]:
# Define the objective for the optimization.
# Here is to minimize the negative roc_auc (area under the curve) score since roc_auc 
# is a metric that gives a selection between precision and recall.
def objective(hyperparameters):
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    model = AdaBoostClassifier(**hyperparameters)
    cv_results = cross_validate(model, x_train, y_train, 
                                scoring='roc_auc', cv=5, 
                                return_train_score=False)
    run_time = timer() - start
    #print(hyperparameters)
    
    # Loss must be minimized (put a negative sign)
    loss = -(np.mean(cv_results['test_score']))

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for later evaluations
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_AdaBoost.csv'

In [None]:
bayesian_optimize(100)

## 3.7 Gradient Boosting
- Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html
    - n_estimators: The number of boosting stages to perform.
    - max_depth: maximum depth of the individual regression estimators.
    - subsample: The fraction of samples to be used for fitting the individual base learners.
    - max_features: The number of features to consider when looking for the best split.
    - learning_rate: learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.
    - random_state

In [None]:
space = {
    'n_estimators': hp.randint('n_estimators', 500),
    'max_depth': hp.uniform('max_depth', 0, 8),
    'subsample': hp.uniform('subsample', 0.0, 1.0),
    'max_features': hp.uniform('max_features', 0.0, 1.0),
    'learning_rate': hp.uniform('learning_rate', 0.0, 1.0),
    'random_state': 0
}

In [None]:
# Define the objective for the optimization.
# Here is to minimize the negative roc_auc (area under the curve) score since roc_auc 
# is a metric that gives a selection between precision and recall.
def objective(hyperparameters):
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    model = GradientBoostingClassifier(**hyperparameters)
    cv_results = cross_validate(model, x_train, y_train, 
                                scoring='roc_auc', cv=5, 
                                return_train_score=False)
    run_time = timer() - start
    #print(hyperparameters)
    
    # Loss must be minimized (put a negative sign)
    loss = -(np.mean(cv_results['test_score']))

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for later evaluations
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_GradientBoost.csv'

In [None]:
bayesian_optimize(100)

## 3.8 XGBoost
- Reference: https://xgboost.readthedocs.io/en/latest/parameter.html
    - Parameters for Tree Booster
        - eta: Step size shrinkage used in update to prevents overfitting.
        - max_depth: Maximum depth of a tree.
        - subsample: Subsample ratio of the training instances.
        - colsample_bytree: Subsample ratio of columns when constructing each tree.
        - colsample_bylevel: Subsample ratio of columns for each level.
        - colsample_bynode: Subsample ratio of columns for each node (split).
        - lambda: L2 regularization term on weights.
        - alpha: L1 regularization term on weights.
    - Learning Task Parameters
        - objective: binary:logistic (logistic regression for binary classification, output probability).
        - eval_metric: auc (Area under the curve).
    - Command Line Parameters
        - num_round: The number of rounds for boosting.
        - task: train (training using data).

In [None]:
space = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'eta': hp.uniform('eta', 0.0, 1.0),
    'max_depth': hp.randint('max_depth', 8),
    'subsample': hp.uniform('subsample', 0.0, 1.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.0, 1.0),
    'colsample_bylevel': hp.uniform('colsample_bylevel', 0.0, 1.0),
    'colsample_bynode': hp.uniform('colsample_bynode', 0.0, 1.0),
    'lambda': hp.uniform('lambda', 0.0, 1.0),
    'alpha': hp.uniform('alpha', 0.0, 1.0),
    'num_round': hp.randint('num_round', 500),
    'task': 'train'
}

In [None]:
# Define the objective for the optimization.
# Here is to minimize the negative roc_auc (area under the curve) score since roc_auc 
# is a metric that gives a selection between precision and recall.
def objective(hyperparameters):
    
    # Keep track of evals
    global ITERATION
    
    ITERATION += 1
    
    start = timer()
    
    # Perform 5-fold cross validation
    cv = xgb.cv(hyperparameters, xgb_train, num_boost_round=hyperparameters['num_round'], 
                nfold=5, early_stopping_rounds=10, verbose_eval=0)
    #print(cv)
    
    run_time = timer() - start
    
    # Loss must be minimized (put a negative sign)
    loss = -(cv['test-auc-mean'][cv.shape[0]-1])
    
    # Update the number of boosting rounds once the early stopping is finished.
    hyperparameters['num_round'] = cv.shape[0]

    # Write searching results to a csv file ('a' means append)
    of_connection = open(OUT_FILE, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, hyperparameters, ITERATION, run_time])
    of_connection.close()

    # Dictionary with information for later evaluations
    return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
            'train_time': run_time, 'status': STATUS_OK}

In [None]:
xgb_train = xgb.DMatrix(x_train, label=y_train)

In [None]:
# Global variable
global ITERATION

ITERATION = 0

OUT_FILE = 'Bank_Marketing_XGB.csv'

In [None]:
bayesian_optimize(100)

## 3.9 LightGBM
- Reference:
    - Core parameters
        - task: default = train.
        - objective: binary, binary log loss classification (or logistic regression).
        - boosting: default = gbdt (traditional Gradient Boosting Decision Tree).
        - data: path of training data, LightGBM will train from this data.
        - num_iterations: number of boosting iterations.
        - learning_rate: shrinkage rate.
        - num_leaves: max number of leaves in one tree
        - seed: this seed is used to generate other seeds.
    - Learning Control Parameters
        - max_depth: limit the max depth for tree model. 
        - bagging_fraction: randomly select part of data without resampling.
        - bagging_freq: 0 means disable bagging; k means perform bagging at every k iteration.
        - feature_fraction: randomly select part of features on each iteration.
        - early_stopping_round: will stop training if one metric of one validation data doesn't improve in last early_stopping_round rounds.
        - lambda_l1: L1 regularization.
        - lambda_l2: L2 regularization.
    - Metric Parameters
        - metric: metric(s) to be evaluated on the evaluation set(s).

In [None]:
def bayes_parameter_opt_lgb(X, y, file_path, init_round=15, opt_round=25, n_folds=5, 
                            random_seed=0, output_process=False):
    # Prepare data
    train_data = lgb.Dataset(data=X, label=y, free_raw_data=False)
    
    # Hyperparameters
    def lgb_eval(num_iterations, learning_rate, max_depth,
                 bagging_fraction, feature_fraction, 
                 lambda_l1, lambda_l2):
        # Predefined parameters
        params = {
            "objective":"binary", "seed":0, "metric":"auc", "bagging_freq":1
        }
        # Hyperparameters
        params['num_iterations'] = int(round(num_iterations))
        params['learning_rate'] = learning_rate
        params['max_depth'] = int(round(max_depth))
        params['bagging_fraction'] = bagging_fraction
        params['feature_fraction'] = feature_fraction
        params['lambda_l1'] = lambda_l1
        params['lambda_l2'] = lambda_l2
        
        # 5-fold cross validations
        cv_result = lgb.cv(params, train_set=train_data, 
                           num_boost_round=num_iterations,
                           nfold=n_folds, 
                           early_stopping_rounds=10,
                           seed=random_seed, 
                           verbose_eval=200, 
                           stratified=False)
        
        # Update the number of boosting rounds once the early stopping is finished.
        params['num_iterations'] = len(cv_result['auc-mean'])
        
        return cv_result['auc-mean'][-1]
    
    
    # Hyperparameter range 
    lgbBO = BayesianOptimization(lgb_eval, {'num_iterations': (100, 500),
                                            'learning_rate': (0.1, 1.0),
                                            'max_depth': (1, 8),
                                            'bagging_fraction': (0.1, 1.0),
                                            'feature_fraction': (0.1, 1.0),
                                            'lambda_l1': (0.0, 1.0),
                                            'lambda_l2': (0.0, 1.0)}, 
                                 random_state=0)
    
    # Start optimizing (maximize the auc score)
    lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')

    # Output optimization process
    if output_process==True: 
        # Write searching results to a csv file ('a' means append)
        of_connection = open(file_path, 'a')
        writer = csv.writer(of_connection)
        headers = ['loss', 'hyperparameters', 'iteration']
        writer.writerow(headers)
        for i in range(len(lgbBO.res)):
            writer.writerow([
                lgbBO.res[i]['target'], lgbBO.res[i]['params'], i])
        of_connection.close()
        
    # Return the searching result
    return lgbBO.res

In [74]:
# In total, performed 100 times of optimization.
opt_params = bayes_parameter_opt_lgb(X=x_train, y=y_train, 
                                     file_path='Bank_Marketing_LightGBM.csv',
                                     init_round=10, opt_round=90, n_folds=5, 
                                     random_seed=0, output_process=True)

|   iter    |  target   | baggin... | featur... | lambda_l1 | lambda_l2 | learni... | max_depth | num_it... |
-------------------------------------------------------------------------------------------------------------




| [0m 1       [0m | [0m 0.9736  [0m | [0m 0.5939  [0m | [0m 0.7437  [0m | [0m 0.6028  [0m | [0m 0.5449  [0m | [0m 0.4813  [0m | [0m 5.521   [0m | [0m 275.0   [0m |




| [95m 2       [0m | [95m 0.9736  [0m | [95m 0.9026  [0m | [95m 0.9673  [0m | [95m 0.3834  [0m | [95m 0.7917  [0m | [95m 0.576   [0m | [95m 4.976   [0m | [95m 470.2   [0m |




| [0m 3       [0m | [0m 0.9696  [0m | [0m 0.1639  [0m | [0m 0.1784  [0m | [0m 0.02022 [0m | [0m 0.8326  [0m | [0m 0.8003  [0m | [0m 7.09    [0m | [0m 491.4   [0m |




| [95m 4       [0m | [95m 0.9741  [0m | [95m 0.8192  [0m | [95m 0.5153  [0m | [95m 0.7805  [0m | [95m 0.1183  [0m | [95m 0.6759  [0m | [95m 2.003   [0m | [95m 477.9   [0m |




| [0m 5       [0m | [0m 0.9733  [0m | [0m 0.5697  [0m | [0m 0.4732  [0m | [0m 0.2646  [0m | [0m 0.7742  [0m | [0m 0.5105  [0m | [0m 4.979   [0m | [0m 107.5   [0m |




| [0m 6       [0m | [0m 0.9732  [0m | [0m 0.6559  [0m | [0m 0.6509  [0m | [0m 0.6169  [0m | [0m 0.9437  [0m | [0m 0.7136  [0m | [0m 3.517   [0m | [0m 274.8   [0m |




[200]	cv_agg's auc: 0.97375 + 0.00109602
| [0m 7       [0m | [0m 0.9738  [0m | [0m 0.7279  [0m | [0m 0.1542  [0m | [0m 0.6668  [0m | [0m 0.6706  [0m | [0m 0.2893  [0m | [0m 1.902   [0m | [0m 226.2   [0m |




| [0m 8       [0m | [0m 0.9731  [0m | [0m 0.4273  [0m | [0m 0.6132  [0m | [0m 0.4386  [0m | [0m 0.9884  [0m | [0m 0.1918  [0m | [0m 2.462   [0m | [0m 164.5   [0m |




[200]	cv_agg's auc: 0.973777 + 0.00102822
| [95m 9       [0m | [95m 0.9743  [0m | [95m 0.6878  [0m | [95m 0.328   [0m | [95m 0.4663  [0m | [95m 0.2444  [0m | [95m 0.2431  [0m | [95m 1.773   [0m | [95m 362.5   [0m |




| [0m 10      [0m | [0m 0.9739  [0m | [0m 0.2244  [0m | [0m 0.2769  [0m | [0m 0.3687  [0m | [0m 0.821   [0m | [0m 0.1874  [0m | [0m 6.866   [0m | [0m 138.4   [0m |




| [0m 11      [0m | [0m 0.9682  [0m | [0m 0.5993  [0m | [0m 0.6948  [0m | [0m 0.5723  [0m | [0m 0.2513  [0m | [0m 0.8206  [0m | [0m 1.103   [0m | [0m 100.1   [0m |




[200]	cv_agg's auc: 0.954734 + 0.00168303
[400]	cv_agg's auc: 0.965418 + 0.00129478
| [0m 12      [0m | [0m 0.966   [0m | [0m 0.9494  [0m | [0m 0.9505  [0m | [0m 0.8927  [0m | [0m 0.9859  [0m | [0m 0.1506  [0m | [0m 1.068   [0m | [0m 427.9   [0m |




| [0m 13      [0m | [0m 0.9737  [0m | [0m 0.9529  [0m | [0m 0.967   [0m | [0m 0.002427[0m | [0m 0.1514  [0m | [0m 0.4408  [0m | [0m 7.604   [0m | [0m 186.8   [0m |




| [95m 14      [0m | [95m 0.9744  [0m | [95m 0.972   [0m | [95m 0.4702  [0m | [95m 0.01689 [0m | [95m 0.08006 [0m | [95m 0.3823  [0m | [95m 1.739   [0m | [95m 322.7   [0m |




| [95m 15      [0m | [95m 0.9752  [0m | [95m 0.9628  [0m | [95m 0.1455  [0m | [95m 0.836   [0m | [95m 0.1266  [0m | [95m 0.1222  [0m | [95m 7.728   [0m | [95m 344.2   [0m |




| [0m 16      [0m | [0m 0.9733  [0m | [0m 0.9911  [0m | [0m 0.1729  [0m | [0m 0.824   [0m | [0m 0.9539  [0m | [0m 0.9379  [0m | [0m 7.97    [0m | [0m 142.7   [0m |




| [0m 17      [0m | [0m 0.9736  [0m | [0m 0.1181  [0m | [0m 0.8851  [0m | [0m 0.01478 [0m | [0m 0.9769  [0m | [0m 0.1636  [0m | [0m 7.285   [0m | [0m 338.5   [0m |




[200]	cv_agg's auc: 0.94544 + 0.00178101
[400]	cv_agg's auc: 0.96141 + 0.00131786
| [0m 18      [0m | [0m 0.9644  [0m | [0m 0.9417  [0m | [0m 0.7568  [0m | [0m 0.3195  [0m | [0m 0.1625  [0m | [0m 0.1089  [0m | [0m 1.006   [0m | [0m 499.8   [0m |




| [0m 19      [0m | [0m 0.975   [0m | [0m 0.6196  [0m | [0m 0.2343  [0m | [0m 0.9256  [0m | [0m 0.01691 [0m | [0m 0.216   [0m | [0m 7.767   [0m | [0m 465.1   [0m |




| [0m 20      [0m | [0m 0.9683  [0m | [0m 0.2527  [0m | [0m 0.1211  [0m | [0m 0.01161 [0m | [0m 0.0346  [0m | [0m 0.9754  [0m | [0m 5.705   [0m | [0m 339.2   [0m |




| [0m 21      [0m | [0m 0.9751  [0m | [0m 0.9641  [0m | [0m 0.9157  [0m | [0m 0.9401  [0m | [0m 0.6075  [0m | [0m 0.2008  [0m | [0m 7.985   [0m | [0m 107.1   [0m |




[200]	cv_agg's auc: 0.972359 + 0.00101818
| [0m 22      [0m | [0m 0.9733  [0m | [0m 0.9482  [0m | [0m 0.2278  [0m | [0m 0.9073  [0m | [0m 0.852   [0m | [0m 0.1307  [0m | [0m 1.649   [0m | [0m 333.3   [0m |




| [0m 23      [0m | [0m 0.975   [0m | [0m 0.9645  [0m | [0m 0.9892  [0m | [0m 0.1142  [0m | [0m 0.9996  [0m | [0m 0.1189  [0m | [0m 7.825   [0m | [0m 107.9   [0m |




| [0m 24      [0m | [0m 0.9751  [0m | [0m 0.9368  [0m | [0m 0.6904  [0m | [0m 0.7901  [0m | [0m 0.04687 [0m | [0m 0.1884  [0m | [0m 7.983   [0m | [0m 476.3   [0m |




| [0m 25      [0m | [0m 0.9749  [0m | [0m 0.9851  [0m | [0m 0.9471  [0m | [0m 0.04664 [0m | [0m 0.9857  [0m | [0m 0.13    [0m | [0m 7.196   [0m | [0m 254.8   [0m |




[200]	cv_agg's auc: 0.94369 + 0.00168823
| [0m 26      [0m | [0m 0.9525  [0m | [0m 0.8875  [0m | [0m 0.8375  [0m | [0m 0.7654  [0m | [0m 0.08071 [0m | [0m 0.1013  [0m | [0m 1.068   [0m | [0m 267.4   [0m |




[200]	cv_agg's auc: 0.975074 + 0.00100249
| [0m 27      [0m | [0m 0.9751  [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.0     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 8.0     [0m | [0m 301.4   [0m |




| [0m 28      [0m | [0m 0.9633  [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 1.0     [0m | [0m 1.0     [0m | [0m 1.0     [0m | [0m 8.0     [0m | [0m 221.3   [0m |




| [0m 29      [0m | [0m 0.9744  [0m | [0m 0.9845  [0m | [0m 0.36    [0m | [0m 0.3818  [0m | [0m 0.9731  [0m | [0m 0.477   [0m | [0m 7.802   [0m | [0m 380.2   [0m |




| [0m 30      [0m | [0m 0.9672  [0m | [0m 0.9735  [0m | [0m 0.2494  [0m | [0m 0.1758  [0m | [0m 0.8767  [0m | [0m 0.3853  [0m | [0m 1.097   [0m | [0m 195.5   [0m |




| [0m 31      [0m | [0m 0.9748  [0m | [0m 0.9805  [0m | [0m 0.322   [0m | [0m 0.05038 [0m | [0m 0.9622  [0m | [0m 0.3273  [0m | [0m 7.489   [0m | [0m 275.2   [0m |




| [0m 32      [0m | [0m 0.9737  [0m | [0m 0.9138  [0m | [0m 0.994   [0m | [0m 0.9195  [0m | [0m 0.9405  [0m | [0m 0.6393  [0m | [0m 7.967   [0m | [0m 360.7   [0m |




| [0m 33      [0m | [0m 0.97    [0m | [0m 0.4106  [0m | [0m 0.904   [0m | [0m 0.6109  [0m | [0m 0.9596  [0m | [0m 0.9504  [0m | [0m 7.287   [0m | [0m 286.4   [0m |




| [0m 34      [0m | [0m 0.9721  [0m | [0m 0.1332  [0m | [0m 0.1083  [0m | [0m 0.982   [0m | [0m 0.7189  [0m | [0m 0.4738  [0m | [0m 7.618   [0m | [0m 244.0   [0m |




| [0m 35      [0m | [0m 0.9734  [0m | [0m 0.1194  [0m | [0m 0.2441  [0m | [0m 0.7816  [0m | [0m 0.9605  [0m | [0m 0.1925  [0m | [0m 2.872   [0m | [0m 313.4   [0m |




| [0m 36      [0m | [0m 0.9727  [0m | [0m 0.5309  [0m | [0m 0.1005  [0m | [0m 0.06951 [0m | [0m 0.9712  [0m | [0m 0.7919  [0m | [0m 7.862   [0m | [0m 174.9   [0m |




| [0m 37      [0m | [0m 0.9738  [0m | [0m 0.8912  [0m | [0m 0.954   [0m | [0m 0.3108  [0m | [0m 0.1035  [0m | [0m 0.3656  [0m | [0m 1.621   [0m | [0m 123.1   [0m |




| [0m 38      [0m | [0m 0.9739  [0m | [0m 0.9942  [0m | [0m 0.8225  [0m | [0m 0.1243  [0m | [0m 0.9595  [0m | [0m 0.8853  [0m | [0m 2.717   [0m | [0m 232.8   [0m |




| [0m 39      [0m | [0m 0.9749  [0m | [0m 0.9904  [0m | [0m 0.9794  [0m | [0m 0.4546  [0m | [0m 0.992   [0m | [0m 0.1109  [0m | [0m 6.216   [0m | [0m 317.9   [0m |




| [0m 40      [0m | [0m 0.9752  [0m | [0m 0.9062  [0m | [0m 0.3002  [0m | [0m 0.05777 [0m | [0m 0.8953  [0m | [0m 0.1082  [0m | [0m 7.91    [0m | [0m 353.9   [0m |




| [0m 41      [0m | [0m 0.975   [0m | [0m 0.8948  [0m | [0m 0.3382  [0m | [0m 0.1039  [0m | [0m 0.1306  [0m | [0m 0.1367  [0m | [0m 6.668   [0m | [0m 154.3   [0m |




[200]	cv_agg's auc: 0.973763 + 0.000993758
| [0m 42      [0m | [0m 0.9743  [0m | [0m 0.9925  [0m | [0m 0.8756  [0m | [0m 0.8096  [0m | [0m 0.2901  [0m | [0m 0.2237  [0m | [0m 1.579   [0m | [0m 397.9   [0m |




[200]	cv_agg's auc: 0.957507 + 0.00159482
| [0m 43      [0m | [0m 0.9661  [0m | [0m 0.9778  [0m | [0m 0.7476  [0m | [0m 0.4217  [0m | [0m 0.107   [0m | [0m 0.1681  [0m | [0m 1.155   [0m | [0m 382.8   [0m |




| [0m 44      [0m | [0m 0.9748  [0m | [0m 0.5174  [0m | [0m 0.5166  [0m | [0m 0.5305  [0m | [0m 0.1954  [0m | [0m 0.1346  [0m | [0m 7.956   [0m | [0m 407.6   [0m |




| [0m 45      [0m | [0m 0.9752  [0m | [0m 0.9969  [0m | [0m 0.2418  [0m | [0m 0.9198  [0m | [0m 0.8271  [0m | [0m 0.2751  [0m | [0m 7.264   [0m | [0m 397.6   [0m |




| [0m 46      [0m | [0m 0.9752  [0m | [0m 0.8999  [0m | [0m 0.4187  [0m | [0m 0.8123  [0m | [0m 0.1002  [0m | [0m 0.1812  [0m | [0m 7.944   [0m | [0m 367.9   [0m |




[200]	cv_agg's auc: 0.957126 + 0.00155272
| [0m 47      [0m | [0m 0.9636  [0m | [0m 0.9624  [0m | [0m 0.6305  [0m | [0m 0.033   [0m | [0m 0.7814  [0m | [0m 0.1631  [0m | [0m 1.166   [0m | [0m 302.2   [0m |




| [0m 48      [0m | [0m 0.9716  [0m | [0m 0.6102  [0m | [0m 0.8793  [0m | [0m 0.09285 [0m | [0m 0.005318[0m | [0m 0.7832  [0m | [0m 7.889   [0m | [0m 120.0   [0m |




| [0m 49      [0m | [0m 0.97    [0m | [0m 0.209   [0m | [0m 0.246   [0m | [0m 0.2069  [0m | [0m 0.9998  [0m | [0m 0.8532  [0m | [0m 7.513   [0m | [0m 272.7   [0m |




| [0m 50      [0m | [0m 0.9752  [0m | [0m 0.8479  [0m | [0m 0.9794  [0m | [0m 0.9317  [0m | [0m 0.8974  [0m | [0m 0.1139  [0m | [0m 7.275   [0m | [0m 174.7   [0m |




| [0m 51      [0m | [0m 0.975   [0m | [0m 0.9279  [0m | [0m 0.1155  [0m | [0m 0.3328  [0m | [0m 0.9953  [0m | [0m 0.1206  [0m | [0m 6.988   [0m | [0m 236.8   [0m |




| [0m 52      [0m | [0m 0.9693  [0m | [0m 0.1192  [0m | [0m 0.1936  [0m | [0m 0.9357  [0m | [0m 0.9289  [0m | [0m 0.5908  [0m | [0m 1.132   [0m | [0m 408.0   [0m |




| [0m 53      [0m | [0m 0.9751  [0m | [0m 0.9431  [0m | [0m 0.1446  [0m | [0m 0.8228  [0m | [0m 0.9911  [0m | [0m 0.1059  [0m | [0m 7.463   [0m | [0m 278.6   [0m |




[200]	cv_agg's auc: 0.972788 + 0.00110372
| [0m 54      [0m | [0m 0.9736  [0m | [0m 0.1659  [0m | [0m 0.7916  [0m | [0m 0.05686 [0m | [0m 0.01987 [0m | [0m 0.1741  [0m | [0m 2.23    [0m | [0m 462.6   [0m |




| [0m 55      [0m | [0m 0.9734  [0m | [0m 0.1215  [0m | [0m 0.9895  [0m | [0m 0.8935  [0m | [0m 0.7998  [0m | [0m 0.1436  [0m | [0m 7.75    [0m | [0m 394.5   [0m |




| [0m 56      [0m | [0m 0.9494  [0m | [0m 0.9929  [0m | [0m 0.493   [0m | [0m 0.7185  [0m | [0m 0.6997  [0m | [0m 0.1682  [0m | [0m 1.057   [0m | [0m 142.3   [0m |




| [0m 57      [0m | [0m 0.9735  [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.6686  [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 7.989   [0m | [0m 163.1   [0m |




| [0m 58      [0m | [0m 0.9709  [0m | [0m 0.9488  [0m | [0m 0.2201  [0m | [0m 0.8376  [0m | [0m 0.9907  [0m | [0m 0.1474  [0m | [0m 1.976   [0m | [0m 111.5   [0m |




| [0m 59      [0m | [0m 0.9745  [0m | [0m 0.6406  [0m | [0m 0.9631  [0m | [0m 0.01966 [0m | [0m 0.1548  [0m | [0m 0.1832  [0m | [0m 2.571   [0m | [0m 177.0   [0m |




| [0m 60      [0m | [0m 0.975   [0m | [0m 0.8957  [0m | [0m 0.2499  [0m | [0m 0.02478 [0m | [0m 0.4911  [0m | [0m 0.1542  [0m | [0m 7.893   [0m | [0m 132.0   [0m |




| [0m 61      [0m | [0m 0.9723  [0m | [0m 0.9126  [0m | [0m 0.8027  [0m | [0m 0.1216  [0m | [0m 0.02638 [0m | [0m 0.9448  [0m | [0m 7.443   [0m | [0m 445.9   [0m |




| [0m 62      [0m | [0m 0.974   [0m | [0m 0.2925  [0m | [0m 0.8977  [0m | [0m 0.6719  [0m | [0m 0.1195  [0m | [0m 0.2036  [0m | [0m 7.775   [0m | [0m 100.9   [0m |




| [0m 63      [0m | [0m 0.9744  [0m | [0m 0.4079  [0m | [0m 0.9636  [0m | [0m 0.0511  [0m | [0m 0.2428  [0m | [0m 0.1623  [0m | [0m 7.968   [0m | [0m 142.1   [0m |




| [0m 64      [0m | [0m 0.9748  [0m | [0m 0.8546  [0m | [0m 0.8783  [0m | [0m 0.1707  [0m | [0m 0.9386  [0m | [0m 0.183   [0m | [0m 7.455   [0m | [0m 245.3   [0m |




| [0m 65      [0m | [0m 0.975   [0m | [0m 0.8662  [0m | [0m 0.1477  [0m | [0m 0.7734  [0m | [0m 0.06851 [0m | [0m 0.2465  [0m | [0m 6.365   [0m | [0m 167.6   [0m |




| [0m 66      [0m | [0m 0.9721  [0m | [0m 0.978   [0m | [0m 0.9554  [0m | [0m 0.6609  [0m | [0m 0.01023 [0m | [0m 0.9065  [0m | [0m 7.57    [0m | [0m 498.7   [0m |




| [0m 67      [0m | [0m 0.9721  [0m | [0m 0.986   [0m | [0m 0.9124  [0m | [0m 0.05657 [0m | [0m 0.04568 [0m | [0m 0.9699  [0m | [0m 7.922   [0m | [0m 461.5   [0m |




| [0m 68      [0m | [0m 0.8827  [0m | [0m 0.9572  [0m | [0m 0.1452  [0m | [0m 0.9302  [0m | [0m 0.9948  [0m | [0m 0.114   [0m | [0m 1.525   [0m | [0m 450.5   [0m |




| [0m 69      [0m | [0m 0.9559  [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.0     [0m | [0m 8.0     [0m | [0m 430.2   [0m |




| [0m 70      [0m | [0m 0.9732  [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 0.1     [0m | [0m 4.429   [0m | [0m 472.5   [0m |




[200]	cv_agg's auc: 0.969681 + 0.00113164
| [0m 71      [0m | [0m 0.9715  [0m | [0m 0.8306  [0m | [0m 0.967   [0m | [0m 0.317   [0m | [0m 0.04263 [0m | [0m 0.6085  [0m | [0m 1.087   [0m | [0m 417.3   [0m |




[200]	cv_agg's auc: 0.974411 + 0.000924949
| [0m 72      [0m | [0m 0.9748  [0m | [0m 0.6058  [0m | [0m 0.9488  [0m | [0m 0.9278  [0m | [0m 0.01684 [0m | [0m 0.1436  [0m | [0m 2.52    [0m | [0m 466.2   [0m |




| [0m 73      [0m | [0m 0.9735  [0m | [0m 0.788   [0m | [0m 0.1343  [0m | [0m 0.1123  [0m | [0m 0.1366  [0m | [0m 0.6869  [0m | [0m 2.672   [0m | [0m 467.8   [0m |




[200]	cv_agg's auc: 0.973166 + 0.000924575
[400]	cv_agg's auc: 0.974438 + 0.00101849
| [0m 74      [0m | [0m 0.9745  [0m | [0m 0.9547  [0m | [0m 0.9858  [0m | [0m 0.09725 [0m | [0m 0.8863  [0m | [0m 0.1489  [0m | [0m 1.981   [0m | [0m 405.8   [0m |




| [0m 75      [0m | [0m 0.9728  [0m | [0m 0.1141  [0m | [0m 0.9284  [0m | [0m 0.7815  [0m | [0m 0.02381 [0m | [0m 0.2023  [0m | [0m 7.77    [0m | [0m 442.8   [0m |




| [0m 76      [0m | [0m 0.9752  [0m | [0m 0.958   [0m | [0m 0.3499  [0m | [0m 0.9802  [0m | [0m 0.9439  [0m | [0m 0.1095  [0m | [0m 7.356   [0m | [0m 493.8   [0m |




[200]	cv_agg's auc: 0.963606 + 0.00140557
| [0m 77      [0m | [0m 0.9679  [0m | [0m 0.1949  [0m | [0m 0.7629  [0m | [0m 0.9903  [0m | [0m 0.7975  [0m | [0m 0.2171  [0m | [0m 1.29    [0m | [0m 352.0   [0m |




| [0m 78      [0m | [0m 0.9745  [0m | [0m 0.2569  [0m | [0m 0.3098  [0m | [0m 0.881   [0m | [0m 0.6538  [0m | [0m 0.1146  [0m | [0m 7.736   [0m | [0m 327.7   [0m |




| [0m 79      [0m | [0m 0.9713  [0m | [0m 0.114   [0m | [0m 0.9399  [0m | [0m 0.9696  [0m | [0m 0.9752  [0m | [0m 0.9672  [0m | [0m 1.392   [0m | [0m 490.0   [0m |




| [0m 80      [0m | [0m 0.9746  [0m | [0m 0.9887  [0m | [0m 0.2636  [0m | [0m 0.9785  [0m | [0m 0.962   [0m | [0m 0.1281  [0m | [0m 4.027   [0m | [0m 184.8   [0m |




[200]	cv_agg's auc: 0.972973 + 0.00117537
| [0m 81      [0m | [0m 0.9734  [0m | [0m 0.1073  [0m | [0m 0.1942  [0m | [0m 0.121   [0m | [0m 0.9656  [0m | [0m 0.1024  [0m | [0m 3.002   [0m | [0m 283.2   [0m |




| [0m 82      [0m | [0m 0.9738  [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 0.9382  [0m | [0m 1.0     [0m | [0m 1.0     [0m | [0m 8.0     [0m | [0m 490.9   [0m |




[200]	cv_agg's auc: 0.973207 + 0.00121019
| [0m 83      [0m | [0m 0.9735  [0m | [0m 0.1     [0m | [0m 1.0     [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 0.1     [0m | [0m 3.276   [0m | [0m 465.9   [0m |




| [0m 84      [0m | [0m 0.9712  [0m | [0m 0.2981  [0m | [0m 0.1253  [0m | [0m 0.8405  [0m | [0m 0.9649  [0m | [0m 0.9096  [0m | [0m 6.81    [0m | [0m 499.7   [0m |




| [0m 85      [0m | [0m 0.9746  [0m | [0m 0.8657  [0m | [0m 0.9173  [0m | [0m 0.9092  [0m | [0m 0.2099  [0m | [0m 0.2599  [0m | [0m 6.559   [0m | [0m 405.4   [0m |




[200]	cv_agg's auc: 0.971758 + 0.000896227
| [0m 86      [0m | [0m 0.973   [0m | [0m 0.294   [0m | [0m 0.1015  [0m | [0m 0.8701  [0m | [0m 0.9678  [0m | [0m 0.1244  [0m | [0m 1.825   [0m | [0m 366.8   [0m |




| [0m 87      [0m | [0m 0.9715  [0m | [0m 0.2598  [0m | [0m 0.9557  [0m | [0m 0.9949  [0m | [0m 0.9889  [0m | [0m 0.5207  [0m | [0m 6.52    [0m | [0m 125.6   [0m |




| [0m 88      [0m | [0m 0.9751  [0m | [0m 0.9956  [0m | [0m 0.4247  [0m | [0m 0.4873  [0m | [0m 0.4393  [0m | [0m 0.129   [0m | [0m 7.755   [0m | [0m 204.8   [0m |




[200]	cv_agg's auc: 0.94753 + 0.00147924
| [0m 89      [0m | [0m 0.9494  [0m | [0m 0.8576  [0m | [0m 0.3652  [0m | [0m 0.1287  [0m | [0m 0.8496  [0m | [0m 0.1145  [0m | [0m 1.076   [0m | [0m 213.0   [0m |




| [0m 90      [0m | [0m 0.9738  [0m | [0m 0.1367  [0m | [0m 0.151   [0m | [0m 0.5961  [0m | [0m 0.6116  [0m | [0m 0.1722  [0m | [0m 7.963   [0m | [0m 196.3   [0m |




| [0m 91      [0m | [0m 0.9749  [0m | [0m 0.9871  [0m | [0m 0.6964  [0m | [0m 0.945   [0m | [0m 0.03849 [0m | [0m 0.1815  [0m | [0m 6.823   [0m | [0m 227.8   [0m |




| [95m 92      [0m | [95m 0.9753  [0m | [95m 0.9898  [0m | [95m 0.3058  [0m | [95m 0.9222  [0m | [95m 0.9762  [0m | [95m 0.1177  [0m | [95m 7.98    [0m | [95m 198.3   [0m |




[200]	cv_agg's auc: 0.944924 + 0.00185739
| [0m 93      [0m | [0m 0.9506  [0m | [0m 0.8328  [0m | [0m 0.9184  [0m | [0m 0.9991  [0m | [0m 0.95    [0m | [0m 0.1063  [0m | [0m 1.096   [0m | [0m 239.9   [0m |




| [0m 94      [0m | [0m 0.9738  [0m | [0m 0.956   [0m | [0m 0.1154  [0m | [0m 0.2205  [0m | [0m 0.06174 [0m | [0m 0.7983  [0m | [0m 5.839   [0m | [0m 228.8   [0m |




| [0m 95      [0m | [0m 0.9748  [0m | [0m 0.9808  [0m | [0m 0.3333  [0m | [0m 0.9217  [0m | [0m 0.003243[0m | [0m 0.335   [0m | [0m 7.943   [0m | [0m 309.4   [0m |




| [0m 96      [0m | [0m 0.9726  [0m | [0m 0.1671  [0m | [0m 0.7469  [0m | [0m 0.02011 [0m | [0m 0.995   [0m | [0m 0.3196  [0m | [0m 6.733   [0m | [0m 230.6   [0m |




| [0m 97      [0m | [0m 0.9746  [0m | [0m 0.2684  [0m | [0m 0.1158  [0m | [0m 0.9034  [0m | [0m 0.09605 [0m | [0m 0.1567  [0m | [0m 7.774   [0m | [0m 152.8   [0m |




| [0m 98      [0m | [0m 0.9752  [0m | [0m 0.9918  [0m | [0m 0.3085  [0m | [0m 0.9125  [0m | [0m 0.04186 [0m | [0m 0.1069  [0m | [0m 7.644   [0m | [0m 385.4   [0m |




| [0m 99      [0m | [0m 0.9739  [0m | [0m 0.1     [0m | [0m 0.1     [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 0.1     [0m | [0m 4.709   [0m | [0m 400.5   [0m |




| [0m 100     [0m | [0m 0.9729  [0m | [0m 0.8829  [0m | [0m 0.1073  [0m | [0m 0.05346 [0m | [0m 0.8142  [0m | [0m 0.9542  [0m | [0m 7.746   [0m | [0m 101.8   [0m |
