In [34]:
import os
import csv
import numpy as np
import pandas as pd
import gc
import time
from contextlib import contextmanager
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import KFold, StratifiedKFold
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from hyperopt import hp, tpe, STATUS_OK
from hyperopt.fmin import fmin
from hyperopt import Trials

In [2]:
DATA_DIR = '/home/yukimiki/.kaggle/competitions/home-credit-default-risk/'

In [3]:
@contextmanager
def timer(title):
    t0 = time.time()
    yield
    print("{} - done in {:.0f}s".format(title, time.time() - t0))

In [4]:
# One-hot encoding for categorical columns with get_dummies
def one_hot_encoder(df, nan_as_category = True):
    original_columns = list(df.columns)
    categorical_columns = [col for col in df.columns if df[col].dtype == 'object']
    df = pd.get_dummies(df, columns= categorical_columns, dummy_na= nan_as_category)
    new_columns = [c for c in df.columns if c not in original_columns]
    return df, new_columns


In [5]:
# Preprocess application_train.csv and application_test.csv
def application_train_test(num_rows = None, nan_as_category = False):
    # Read data and merge
    df = pd.read_csv(os.path.join(DATA_DIR, 'application_train.csv'), nrows= num_rows)
    test_df = pd.read_csv(os.path.join(DATA_DIR, 'application_test.csv'), nrows= num_rows)
    print("Train samples: {}, test samples: {}".format(len(df), len(test_df)))
    df = df.append(test_df).reset_index()
    # Optional: Remove 4 applications with XNA CODE_GENDER (train set)
    df = df[df['CODE_GENDER'] != 'XNA']
    # drop certiain clomns
    """
    drop_colmns = ['AMT_REQ_CREDIT_BUREAU_DAY', 'AMT_REQ_CREDIT_BUREAU_HOUR', 'AMT_REQ_CREDIT_BUREAU_MON'
                               , 'AMT_REQ_CREDIT_BUREAU_QRT', 'AMT_REQ_CREDIT_BUREAU_WEEK', 'LIVINGAPARTMENTS_MODE'
                               , 'BASEMENTAREA_MODE', 'BASEMENTAREA_MEDI'
                               , 'COMMONAREA_AVG', 'DEF_30_CNT_SOCIAL_CIRCLE', 'DEF_60_CNT_SOCIAL_CIRCLE', 'ELEVATORS_MODE'
                               , 'ELEVATORS_AVG', 'ENTRANCES_MODE', 'ENTRANCES_AVG', 'FLOORSMAX_MODE', 'FLOORSMAX_AVG'
                               , 'FLOORSMIN_MODE', 'FLOORSMIN_AVG', 'LANDAREA_MODE', 'LANDAREA_AVG'
                               , 'LIVINGAPARTMENTS_MEDI', 'LIVINGAPARTMENTS_MODE', 'LIVINGAREA_AVG', 'LIVINGAREA_MEDI', 'LIVINGAREA_MODE'
                               , 'NONLIVINGAPARTMENTS_MEDI', 'NONLIVINGAPARTMENTS_MODE', 'NONLIVINGAREA_MEDI', 'NONLIVINGAREA_MODE'
                               , 'OBS_60_CNT_SOCIAL_CIRCLE', 'REGION_RATING_CLIENT_W_CITY', 'YEARS_BEGINEXPLUATATION_MEDI'
                               , 'YEARS_BEGINEXPLUATATION_MODE', 'YEARS_BUILD_MEDI', 'YEARS_BUILD_MODE']
    df = df.drop(drop_colmns, axis=1)
    """
    # Categorical features with Binary encode (0 or 1; two categories)
    for bin_feature in ['CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY']:
        df[bin_feature], uniques = pd.factorize(df[bin_feature])
    # Categorical features with One-Hot encode
    df, cat_cols = one_hot_encoder(df, nan_as_category)
    
    # NaN values for DAYS_EMPLOYED: 365.243 -> nan
    df['DAYS_EMPLOYED'].replace(365243, np.nan, inplace= True)
    # Some simple new features (percentages)
    df['DAYS_EMPLOYED_PERC'] = df['DAYS_EMPLOYED'] / df['DAYS_BIRTH']
    df['DAYS_EMPLOYED_DIFF'] = df['DAYS_EMPLOYED'] - df['DAYS_BIRTH']
    df['INCOME_CREDIT_PERC'] = df['AMT_INCOME_TOTAL'] / df['AMT_CREDIT']
    df['INCOME_PER_PERSON'] = df['AMT_INCOME_TOTAL'] / df['CNT_FAM_MEMBERS']
    df['ANNUITY_INCOME_PERC'] = df['AMT_ANNUITY'] / df['AMT_INCOME_TOTAL']
    df['PAYMENT_RATE'] = df['AMT_ANNUITY'] / df['AMT_CREDIT']
    return df

In [6]:
# Preprocess bureau.csv and bureau_balance.csv
def bureau_and_balance(num_rows = None, nan_as_category = True):
    bureau = pd.read_csv(os.path.join(DATA_DIR, 'bureau.csv'), nrows = num_rows)
    bb = pd.read_csv(os.path.join(DATA_DIR, 'bureau_balance.csv'), nrows = num_rows)
    bb, bb_cat = one_hot_encoder(bb, nan_as_category)
    bureau, bureau_cat = one_hot_encoder(bureau, nan_as_category)
    
    # Bureau balance: Perform aggregations and merge with bureau.csv
    bb_aggregations = {'MONTHS_BALANCE': ['min', 'max', 'size']}
    for col in bb_cat:
        bb_aggregations[col] = ['mean']
    bb_agg = bb.groupby('SK_ID_BUREAU').agg(bb_aggregations)
    bb_agg.columns = pd.Index([e[0] + "_" + e[1].upper() for e in bb_agg.columns.tolist()])
    bureau = bureau.join(bb_agg, how='left', on='SK_ID_BUREAU')
    bureau.drop(['SK_ID_BUREAU'], axis=1, inplace= True)

    # Bureau and bureau_balance numeric features
    num_aggregations = {
        'DAYS_CREDIT': ['min', 'max', 'mean', 'var'],
        'DAYS_CREDIT_ENDDATE': ['min', 'max', 'mean'],
        'DAYS_CREDIT_UPDATE': ['mean'],
        'CREDIT_DAY_OVERDUE': ['max', 'mean'],
        'AMT_CREDIT_MAX_OVERDUE': ['mean'],
        'AMT_CREDIT_SUM': ['max', 'mean', 'sum'],
        'AMT_CREDIT_SUM_DEBT': ['max', 'mean', 'sum'],
        'AMT_CREDIT_SUM_OVERDUE': ['mean'],
        'AMT_CREDIT_SUM_LIMIT': ['mean', 'sum'],
        'AMT_ANNUITY': ['max', 'mean'],
        'CNT_CREDIT_PROLONG': ['sum'],
        'MONTHS_BALANCE_MIN': ['min'],
        'MONTHS_BALANCE_MAX': ['max'],
        'MONTHS_BALANCE_SIZE': ['mean', 'sum']
    }
    # Bureau and bureau_balance categorical features
    cat_aggregations = {}
    for cat in bureau_cat: cat_aggregations[cat] = ['mean']
    for cat in bb_cat: cat_aggregations[cat + "_MEAN"] = ['mean']
    
    bureau_agg = bureau.groupby('SK_ID_CURR').agg({**num_aggregations, **cat_aggregations})
    bureau_agg.columns = pd.Index(['BURO_' + e[0] + "_" + e[1].upper() for e in bureau_agg.columns.tolist()])
    # Bureau: Active credits - using only numerical aggregations
    active = bureau[bureau['CREDIT_ACTIVE_Active'] == 1]
    active_agg = active.groupby('SK_ID_CURR').agg(num_aggregations)
    active_agg.columns = pd.Index(['ACTIVE_' + e[0] + "_" + e[1].upper() for e in active_agg.columns.tolist()])
    bureau_agg = bureau_agg.join(active_agg, how='left', on='SK_ID_CURR')

    # Bureau: Closed credits - using only numerical aggregations
    closed = bureau[bureau['CREDIT_ACTIVE_Closed'] == 1]
    closed_agg = closed.groupby('SK_ID_CURR').agg(num_aggregations)
    closed_agg.columns = pd.Index(['CLOSED_' + e[0] + "_" + e[1].upper() for e in closed_agg.columns.tolist()])
    bureau_agg = bureau_agg.join(closed_agg, how='left', on='SK_ID_CURR')

    return bureau_agg


In [7]:
# Preprocess previous_applications.csv
def previous_applications(num_rows = None, nan_as_category = True):
    prev = pd.read_csv(os.path.join(DATA_DIR, 'previous_application.csv'), nrows = num_rows)
    prev, cat_cols = one_hot_encoder(prev, nan_as_category= True)
    # Days 365.243 values -> nan
    prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
    prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
    # Add feature: value ask / value received percentage
    prev['APP_CREDIT_PERC'] = prev['AMT_APPLICATION'] / prev['AMT_CREDIT']
    # Previous applications numeric features
    num_aggregations = {
        'AMT_ANNUITY': ['min', 'max', 'mean'],
        'AMT_APPLICATION': ['min', 'max', 'mean'],
        'AMT_CREDIT': ['min', 'max', 'mean'],
        'APP_CREDIT_PERC': ['min', 'max', 'mean', 'var'],
        'AMT_DOWN_PAYMENT': ['min', 'max', 'mean'],
        'AMT_GOODS_PRICE': ['min', 'max', 'mean'],
        'HOUR_APPR_PROCESS_START': ['min', 'max', 'mean'],
        'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
        'DAYS_DECISION': ['min', 'max', 'mean'],
        'CNT_PAYMENT': ['mean', 'sum'],
    }
    # Previous applications categorical features
    cat_aggregations = {}
    for cat in cat_cols:
        cat_aggregations[cat] = ['mean']
    
    prev_agg = prev.groupby('SK_ID_CURR').agg({**num_aggregations, **cat_aggregations})
    prev_agg.columns = pd.Index(['PREV_' + e[0] + "_" + e[1].upper() for e in prev_agg.columns.tolist()])
    # Previous Applications: Approved Applications - only numerical features
    approved = prev[prev['NAME_CONTRACT_STATUS_Approved'] == 1]
    approved_agg = approved.groupby('SK_ID_CURR').agg(num_aggregations)
    approved_agg.columns = pd.Index(['APPROVED_' + e[0] + "_" + e[1].upper() for e in approved_agg.columns.tolist()])
    prev_agg = prev_agg.join(approved_agg, how='left', on='SK_ID_CURR')
    # Previous Applications: Refused Applications - only numerical features
    refused = prev[prev['NAME_CONTRACT_STATUS_Refused'] == 1]
    refused_agg = refused.groupby('SK_ID_CURR').agg(num_aggregations)
    refused_agg.columns = pd.Index(['REFUSED_' + e[0] + "_" + e[1].upper() for e in refused_agg.columns.tolist()])
    prev_agg = prev_agg.join(refused_agg, how='left', on='SK_ID_CURR')
    return prev_agg


In [8]:
# Preprocess POS_CASH_balance.csv
def pos_cash(num_rows = None, nan_as_category = True):
    pos = pd.read_csv(os.path.join(DATA_DIR, 'POS_CASH_balance.csv'), nrows = num_rows)
    pos, cat_cols = one_hot_encoder(pos, nan_as_category= True)
    # Features
    aggregations = {
        'MONTHS_BALANCE': ['max', 'mean', 'size'],
        'SK_DPD': ['max', 'mean'],
        'SK_DPD_DEF': ['max', 'mean']
    }
    for cat in cat_cols:
        aggregations[cat] = ['mean']
    
    pos_agg = pos.groupby('SK_ID_CURR').agg(aggregations)
    pos_agg.columns = pd.Index(['POS_' + e[0] + "_" + e[1].upper() for e in pos_agg.columns.tolist()])
    # Count pos cash accounts
    pos_agg['POS_COUNT'] = pos.groupby('SK_ID_CURR').size()
    return pos_agg

In [9]:
# Preprocess installments_payments.csv
def installments_payments(num_rows = None, nan_as_category = True):
    ins = pd.read_csv(os.path.join(DATA_DIR, 'installments_payments.csv'), nrows = num_rows)
    ins, cat_cols = one_hot_encoder(ins, nan_as_category= True)
    # Percentage and difference paid in each installment (amount paid and installment value)
    ins['PAYMENT_PERC'] = ins['AMT_PAYMENT'] / ins['AMT_INSTALMENT']
    ins['PAYMENT_DIFF'] = ins['AMT_INSTALMENT'] - ins['AMT_PAYMENT']
    # Days past due and days before due (no negative values)
    ins['DPD'] = ins['DAYS_ENTRY_PAYMENT'] - ins['DAYS_INSTALMENT']
    ins['DBD'] = ins['DAYS_INSTALMENT'] - ins['DAYS_ENTRY_PAYMENT']
    ins['DPD'] = ins['DPD'].apply(lambda x: x if x > 0 else 0)
    ins['DBD'] = ins['DBD'].apply(lambda x: x if x > 0 else 0)
    # Features: Perform aggregations
    aggregations = {
        'NUM_INSTALMENT_VERSION': ['nunique'],
        'DPD': ['max', 'mean', 'sum'],
        'DBD': ['max', 'mean', 'sum'],
        'PAYMENT_PERC': ['max', 'mean', 'sum', 'var'],
        'PAYMENT_DIFF': ['max', 'mean', 'sum', 'var'],
        'AMT_INSTALMENT': ['max', 'mean', 'sum'],
        'AMT_PAYMENT': ['min', 'max', 'mean', 'sum'],
        'DAYS_ENTRY_PAYMENT': ['max', 'mean', 'sum']
    }
    for cat in cat_cols:
        aggregations[cat] = ['mean']
    ins_agg = ins.groupby('SK_ID_CURR').agg(aggregations)
    ins_agg.columns = pd.Index(['INSTAL_' + e[0] + "_" + e[1].upper() for e in ins_agg.columns.tolist()])
    # Count installments accounts
    ins_agg['INSTAL_COUNT'] = ins.groupby('SK_ID_CURR').size()
    return ins_agg

In [10]:
# Preprocess credit_card_balance.csv
def credit_card_balance(num_rows = None, nan_as_category = True):
    cc = pd.read_csv(os.path.join(DATA_DIR, 'credit_card_balance.csv'), nrows = num_rows)
    cc, cat_cols = one_hot_encoder(cc, nan_as_category= True)
    # General aggregations
    cc.drop(['SK_ID_PREV'], axis= 1, inplace = True)
    cc_agg = cc.groupby('SK_ID_CURR').agg(['min', 'max', 'mean', 'sum', 'var'])
    cc_agg.columns = pd.Index(['CC_' + e[0] + "_" + e[1].upper() for e in cc_agg.columns.tolist()])
    # Count credit card lines
    cc_agg['CC_COUNT'] = cc.groupby('SK_ID_CURR').size()

    return cc_agg

In [11]:
# Display/plot feature importance
def display_importances(feature_importance_df_):
    cols = feature_importance_df_[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)[:40].index
    best_features = feature_importance_df_.loc[feature_importance_df_.feature.isin(cols)]
    plt.figure(figsize=(8, 10))
    sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False))
    plt.title('LightGBM Features (avg over folds)')
    plt.tight_layout()
    plt.show()


In [12]:
def feature_generation():
    num_rows = 10000
    df = application_train_test(num_rows)
    with timer("Process bureau and bureau_balance"):
        bureau = bureau_and_balance(num_rows)
        print("Bureau df shape:", bureau.shape)
        df = df.join(bureau, how='left', on='SK_ID_CURR')

    with timer("Process previous_applications"):
        prev = previous_applications(num_rows)
        print("Previous applications df shape:", prev.shape)
        df = df.join(prev, how='left', on='SK_ID_CURR')

    with timer("Process POS-CASH balance"):
        pos = pos_cash(num_rows)
        print("Pos-cash balance df shape:", pos.shape)
        df = df.join(pos, how='left', on='SK_ID_CURR')

    with timer("Process installments payments"):
        ins = installments_payments(num_rows)
        print("Installments payments df shape:", ins.shape)
        df = df.join(ins, how='left', on='SK_ID_CURR')

    with timer("Process credit card balance"):
        cc = credit_card_balance(num_rows)
        print("Credit card balance df shape:", cc.shape)
        df = df.join(cc, how='left', on='SK_ID_CURR')
    
    print('save feature data to pickle')
    df.to_pickle('feature.pkl')

In [40]:
# LightGBM GBDT with KFold or Stratified KFold
# Parameters from Tilii kernel: https://www.kaggle.com/tilii7/olivier-lightgbm-parameters-by-bayesian-opt/code
def objective(params):
    # Divide in training/validation and test data
    global ITERATION
    
    ITERATION += 1
    
    

    # Cross validation model
    num_folds = 5
    stratified = False
    
    if stratified:
        folds = StratifiedKFold(n_splits= num_folds, shuffle=True, random_state=1001)
    else:
        folds = KFold(n_splits= num_folds, shuffle=True, random_state=1001)
    # Create arrays and dataframes to store results
    oof_preds = np.zeros(train_df.shape[0])
    #sub_preds = np.zeros(test_df.shape[0])
    # feature_importance_df = pd.DataFrame()
    
    
    for n_fold, (train_idx, valid_idx) in enumerate(folds.split(train_df[feats], train_df['TARGET'])):
        train_x, train_y = train_df[feats].iloc[train_idx], train_df['TARGET'].iloc[train_idx]
        valid_x, valid_y = train_df[feats].iloc[valid_idx], train_df['TARGET'].iloc[valid_idx]

        # Retrieve the subsample if present otherwise set to 1.0
#             subsample = params['boosting_type'].get('subsample', 1.0)

        # Extract the boosting type
#             params['boosting_type'] = params['boosting_type']['boosting_type']
#             params['subsample'] = subsample

        # Make sure parameters that need to be integers are integers
        for parameter_name in ['num_leaves', 'subsample_for_bin', 'min_child_samples']:
            params[parameter_name] = int(params[parameter_name])

        # LightGBM parameters found by Bayesian optimization
        clf = LGBMClassifier(**params)

        clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], 
            eval_metric= 'auc', verbose= 100, early_stopping_rounds= 200)

        oof_preds[valid_idx] = clf.predict_proba(valid_x, num_iteration=clf.best_iteration_)[:, 1]
        #sub_preds += clf.predict_proba(test_df[feats], num_iteration=clf.best_iteration_)[:, 1] / folds.n_splits

        #print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(valid_y, oof_preds[valid_idx])))

    
    full_auc_score =  roc_auc_score(train_df['TARGET'], oof_preds)
    loss =  1 - full_auc_score
    
    # Write to the csv file ('a' means append)
    of_connection = open('param_tuning_lightgbm.csv', 'a')
    writer = csv.writer(of_connection)
    writer.writerow([loss, params, ITERATION])
    
    
    return {'loss': loss, 'params': params, 'iteration': ITERATION, 'status': STATUS_OK}
    

In [41]:
submission_file_name = "submission_kernel02.csv"
with timer("feature generation"):
    feature_generation()

Train samples: 10000, test samples: 10000
Bureau df shape: (2011, 108)
Process bureau and bureau_balance - done in 0s
Previous applications df shape: (9734, 242)
Process previous_applications - done in 1s
Pos-cash balance df shape: (9494, 15)
Process POS-CASH balance - done in 0s
Installments payments df shape: (8893, 26)
Process installments payments - done in 0s
Credit card balance df shape: (9520, 131)
Process credit card balance - done in 0s
save feature data to pickle
feature generation - done in 2s


In [42]:
# Define the search space
space = {
        'class_weight': hp.choice('class_weight', [None, 'balanced']),
#         'boosting_type': hp.choice('boosting_type', [{'boosting_type': 'gbdt', 'subsample': hp.uniform('gdbt_subsample', 0.5, 1)}, 
#                                                      {'boosting_type': 'dart', 'subsample': hp.uniform('dart_subsample', 0.5, 1)},
#                                                      {'boosting_type': 'goss', 'subsample': 1.0}]),
        'num_leaves': hp.quniform('num_leaves', 30, 150, 1),
        'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
        'subsample_for_bin': hp.quniform('subsample_for_bin', 20000, 300000, 20000),
        'min_child_samples': hp.quniform('min_child_samples', 20, 500, 5),
        'reg_alpha': hp.uniform('reg_alpha', 0.0, 1.0),
        'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
        'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0)
            }

In [43]:
global ITERATION
ITERATION = 0

In [44]:
df = pd.read_pickle('feature.pkl')

In [45]:
train_df = df[df['TARGET'].notnull()]
test_df = df[df['TARGET'].isnull()]
print("Starting LightGBM. Train shape: {}, test shape: {}".format(train_df.shape, test_df.shape))
feats = [f for f in train_df.columns if f not in ['TARGET','SK_ID_CURR','SK_ID_BUREAU','SK_ID_PREV','index']]

Starting LightGBM. Train shape: (10000, 769), test shape: (10000, 769)


In [46]:
bayes_trials = Trials()

In [47]:
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100, trials=bayes_trials, 
                rstate=np.random.RandomState(50))

Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.907782	valid_1's auc: 0.772653
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.907782	valid_1's auc: 0.772653
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.903359	valid_1's auc: 0.751253
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.903359	valid_1's auc: 0.751253
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.900153	valid_1's auc: 0.777178
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.900153	valid_1's auc: 0.777178
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.901167	valid_1's auc: 0.73364
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.901167	valid_1's auc: 0.73364
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.899183	valid_1's auc: 0.731487


[100]	training's auc: 0.996713	valid_1's auc: 0.765348
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.996713	valid_1's auc: 0.765348
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.995537	valid_1's auc: 0.734524
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.995537	valid_1's auc: 0.734524
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.994932	valid_1's auc: 0.722622
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.994932	valid_1's auc: 0.722622
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.860083	valid_1's auc: 0.775961
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.860083	valid_1's auc: 0.775961
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.862862	valid_1's auc: 0.755511
Did not meet early stopping. Best iteration is:
[100]	trainin

Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.965479	valid_1's auc: 0.743156
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.965479	valid_1's auc: 0.743156
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.965979	valid_1's auc: 0.749523
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.965979	valid_1's auc: 0.749523
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.963976	valid_1's auc: 0.756608
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.963976	valid_1's auc: 0.756608
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.965268	valid_1's auc: 0.726159
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.965268	valid_1's auc: 0.726159
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.961497	valid_1's auc: 0.72537

[100]	training's auc: 0.842205	valid_1's auc: 0.77869
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.842205	valid_1's auc: 0.77869
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.844997	valid_1's auc: 0.720904
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.844997	valid_1's auc: 0.720904
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.844965	valid_1's auc: 0.724797
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.844965	valid_1's auc: 0.724797
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.854447	valid_1's auc: 0.776773
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.854447	valid_1's auc: 0.776773
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.856963	valid_1's auc: 0.748161
Did not meet early stopping. Best iteration is:
[100]	training'

Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.881011	valid_1's auc: 0.77079
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.881011	valid_1's auc: 0.77079
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.88343	valid_1's auc: 0.751521
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.88343	valid_1's auc: 0.751521
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.876533	valid_1's auc: 0.781415
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.876533	valid_1's auc: 0.781415
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.879832	valid_1's auc: 0.735756
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.879832	valid_1's auc: 0.735756
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.878669	valid_1's auc: 0.742785
Di

[100]	training's auc: 0.973851	valid_1's auc: 0.778443
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.973851	valid_1's auc: 0.778443
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.972309	valid_1's auc: 0.739468
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.972309	valid_1's auc: 0.739468
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.970381	valid_1's auc: 0.746711
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.970381	valid_1's auc: 0.746711
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.966484	valid_1's auc: 0.763059
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.966484	valid_1's auc: 0.763059
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.965434	valid_1's auc: 0.757561
Did not meet early stopping. Best iteration is:
[100]	trainin

Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.911471	valid_1's auc: 0.774454
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.911471	valid_1's auc: 0.774454
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.912791	valid_1's auc: 0.751176
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.912791	valid_1's auc: 0.751176
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.906154	valid_1's auc: 0.781259
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.906154	valid_1's auc: 0.781259
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.907572	valid_1's auc: 0.732599
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.907572	valid_1's auc: 0.732599
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.90481	valid_1's auc: 0.733153

[100]	training's auc: 0.998831	valid_1's auc: 0.762466
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.998831	valid_1's auc: 0.762466
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.998485	valid_1's auc: 0.730989
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.998485	valid_1's auc: 0.730989
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.998269	valid_1's auc: 0.732756
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.998269	valid_1's auc: 0.732756
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.961553	valid_1's auc: 0.780295
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.961553	valid_1's auc: 0.780295
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.962363	valid_1's auc: 0.748039
Did not meet early stopping. Best iteration is:
[100]	trainin

Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.876425	valid_1's auc: 0.776002
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.876425	valid_1's auc: 0.776002
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.875432	valid_1's auc: 0.758469
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.875432	valid_1's auc: 0.758469
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.872048	valid_1's auc: 0.781822
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.872048	valid_1's auc: 0.781822
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.876054	valid_1's auc: 0.734818
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.876054	valid_1's auc: 0.734818
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.87267	valid_1's auc: 0.737208

[100]	training's auc: 0.942108	valid_1's auc: 0.77789
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.942108	valid_1's auc: 0.77789
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.941454	valid_1's auc: 0.729379
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.941454	valid_1's auc: 0.729379
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.941747	valid_1's auc: 0.741053
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.941747	valid_1's auc: 0.741053
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.973236	valid_1's auc: 0.760371
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.973236	valid_1's auc: 0.760371
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.972365	valid_1's auc: 0.761366
Did not meet early stopping. Best iteration is:
[100]	training'

Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.94813	valid_1's auc: 0.767182
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.94813	valid_1's auc: 0.767182
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.949743	valid_1's auc: 0.759179
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.949743	valid_1's auc: 0.759179
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.94575	valid_1's auc: 0.782131
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.94575	valid_1's auc: 0.782131
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.946463	valid_1's auc: 0.745233
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.946463	valid_1's auc: 0.745233
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.943899	valid_1's auc: 0.74396
Did

[100]	training's auc: 0.950127	valid_1's auc: 0.778811
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.950127	valid_1's auc: 0.778811
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.952207	valid_1's auc: 0.739748
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.952207	valid_1's auc: 0.739748
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.948479	valid_1's auc: 0.735616
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.948479	valid_1's auc: 0.735616
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.965749	valid_1's auc: 0.763125
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.965749	valid_1's auc: 0.763125
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.966062	valid_1's auc: 0.755975
Did not meet early stopping. Best iteration is:
[100]	trainin

Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.8895	valid_1's auc: 0.769453
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.8895	valid_1's auc: 0.769453
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.890569	valid_1's auc: 0.755387
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.890569	valid_1's auc: 0.755387
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.886059	valid_1's auc: 0.779114
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.886059	valid_1's auc: 0.779114
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.890843	valid_1's auc: 0.738272
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.890843	valid_1's auc: 0.738272
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.888944	valid_1's auc: 0.737379
Di

[100]	training's auc: 0.960968	valid_1's auc: 0.779465
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.960968	valid_1's auc: 0.779465
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.961636	valid_1's auc: 0.746652
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.961636	valid_1's auc: 0.746652
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.958694	valid_1's auc: 0.754164
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.958694	valid_1's auc: 0.754164
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.932418	valid_1's auc: 0.765299
Did not meet early stopping. Best iteration is:
[100]	training's auc: 0.932418	valid_1's auc: 0.765299
Training until validation scores don't improve for 200 rounds.
[100]	training's auc: 0.932264	valid_1's auc: 0.751396
Did not meet early stopping. Best iteration is:
[100]	trainin

In [48]:
bayes_trials_results = sorted(bayes_trials.results, key = lambda x: x['loss'])

In [None]:
bayes_trials_results[:2]