In [9]:
import catboost as cb
import gc
import joblib
import json
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import seaborn as sns
import sys
sys.path.append("../")
import warnings
warnings.simplefilter("ignore")
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import roc_auc_score, fbeta_score, make_scorer
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

In [2]:
from utils.common import (
    sigmoid, pad_column_name
)
from utils.constants import *
from utils.eval_helpers import (
    plot_roc_curves, plot_feature_importance, 
    amex_metric, get_final_metric_df, amex_metric_np, lgb_amex_metric
)
from utils.eda_helpers import (
    plot_missing_proportion_barchart, 
    get_cols
)
from utils.extraction_helpers import read_file
from utils.feature_group import (
    CATEGORY_COLUMNS, CONTINUOUS_COLUMNS, NON_FEATURE_COLUMNS
)
from utils.feature_engineering_helpers import feature_gen_pipeline

In [3]:
from matplotlib.ticker import MaxNLocator
from matplotlib.colors import ListedColormap
from cycler import cycler
from IPython.display import display
from colorama import Fore, Back, Style
plt.rcParams['axes.facecolor'] = '#0057b8' # blue
plt.rcParams['axes.prop_cycle'] = cycler(color=['#ffd700'] +
                                         plt.rcParams['axes.prop_cycle'].by_key()['color'][1:])
plt.rcParams['text.color'] = 'w'

In [4]:
%load_ext autoreload
%autoreload

### Read Data

In [5]:
%%time
risk_df = read_file(f"{INTERIM_DATA_PATH}/v6/train_parquet/train_risk.parquet")
spend_payment_df = read_file(f"{INTERIM_DATA_PATH}/v6/train_parquet/train_spend_payment.parquet")

Shape of data: (5531451, 31)
Shape of data: (5531451, 24)
CPU times: user 5.47 s, sys: 3.23 s, total: 8.7 s
Wall time: 3.95 s


In [6]:
labels = read_file(f"{RAW_DATA_PATH}/train_labels.csv")

Shape of data: (458913, 2)


In [7]:
%%time
train = pd.concat([
    spend_payment_df, 
    risk_df.loc[:, get_cols(risk_df, "R_")]
], axis=1)

CPU times: user 1.22 s, sys: 927 ms, total: 2.15 s
Wall time: 2.15 s


In [10]:
%%time
train_agg, keep_column = feature_gen_pipeline(train)

Done insertion
Average done
Minimum done
Maximum done
Standard Deviation done
Last entry done
First entry done
Second last entry done
Third last entry done
MA2 for Recency 1 done
MA2 for Recency 2 done
MA2 for Recency 3 done
MA3 for Recency 1 done
MA3 for Recency 2 done
MA3 for least Recency done


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 47/47 [00:24<00:00,  1.92it/s]


CPU times: user 47.3 s, sys: 10.2 s, total: 57.6 s
Wall time: 57.9 s


In [12]:
train_agg.head(3)

Unnamed: 0_level_0,P_2_last,S_3_last,P_3_last,S_5_last,S_6_last,S_7_last,S_8_last,S_12_last,S_13_last,S_15_last,...,R_5_previous_sprint,R_5_acceleration,R_5_range,R_5_displacement,R_5_displacement_ratio,R_5_velocity,R_5_last_minus_avg,R_5_last_minus_midpoint,R_5_coef_var,R_5_trend_index
customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fbac11a8ed792feb62a,0.93,0.13,0.62,0.03,0,0.1,0.48,0.18,8.0,0.4,...,,,,,,,,,,
00000fd6641609c6ece5454664794f0340ad84dddce9a267a310b5ae68e9d8e5,0.88,0.16,0.57,0.04,0,0.2,0.4,0.19,2.0,0.4,...,,,,,,,,,,
00001b22f846c82c51f6e3958ccd81970162bae8b007e80662ef27519fcc18c1,0.88,,0.62,0.0,1,,,0.19,,,...,,,,,,,,,,


In [13]:
# len(features_to_drop)
# train_agg = train_agg.drop(columns=list(features_to_drop), errors="ignore")

In [14]:
target = labels["target"].values

In [15]:
%%time
train_agg = train_agg.drop(columns=NON_FEATURE_COLUMNS + ["target"], errors="ignore")
gc.collect()

CPU times: user 5.09 s, sys: 11.2 s, total: 16.3 s
Wall time: 21.6 s


0

In [16]:
cat_features = train_agg.select_dtypes("category").columns.tolist()

In [17]:
train_agg.loc[:, cat_features] = train_agg.loc[:, cat_features].astype(float).fillna(-1).astype(int).astype("category")

In [18]:
train_agg.shape, target.shape

((458913, 1364), (458913,))

### Train Catboost

In [20]:
param = {
    "objective": "Logloss",
    "colsample_bylevel": 0.05,
    "depth": 6,
    "boosting_type": "Ordered",
    "bootstrap_type": "MVS",
    "n_estimators": 1000,
    "l2_leaf_reg": 3,
    "min_data_in_leaf": 1024,
    "learning_rate": 0.05,
    "max_bin": 127,
    "used_ram_limit": "16gb",
}

In [21]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1020)

In [None]:
train_score_list, val_score_list = [], []
for fold, (idx_tr, idx_va) in zip(range(1, 5+1), kf.split(train_agg, target)):
    fold = str(fold)
    X_train, y_train = train_agg.iloc[idx_tr], target[idx_tr]
    X_val, y_val = train_agg.iloc[idx_va], target[idx_va]
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=UserWarning)
        model = cb.CatBoostClassifier(**param)
        model.fit(
            X_train, 
            y_train, 
            eval_set=[(X_val, y_val)], 
            verbose=100, 
            cat_features=cat_features,
            early_stopping_rounds=100
        )
    y_train_pred = model.predict(X_train, prediction_type='RawFormulaVal')
    train_score, train_g, train_t4 = amex_metric(y_train, y_train_pred)
    train_data, X_train, y_train = None, None, None
    y_val_pred = model.predict(X_val, prediction_type='RawFormulaVal')
    val_score, val_g, val_t4 = amex_metric(y_val, y_val_pred)
    valid_data, X_val, y_val = None, None, None
    train_score_list.append(train_score)
    val_score_list.append(val_score)
    print(f"{Fore.BLUE}{Style.BRIGHT}Fold {fold} | Train Score = {train_score:.5f} ({train_g:.4f}, {train_t4:.4f})")
    print(f"{Fore.GREEN}{Style.BRIGHT}Fold {fold} | Val Score = {val_score:.5f} ({val_g:.4f}, {val_t4:.4f}){Style.RESET_ALL}")
    print(f"Clear cache {gc.collect()}")

0:	learn: 0.6404905	test: 0.6407026	best: 0.6407026 (0)	total: 237ms	remaining: 3m 57s
100:	learn: 0.2659042	test: 0.2709228	best: 0.2709228 (100)	total: 14.7s	remaining: 2m 10s
200:	learn: 0.2595988	test: 0.2657067	best: 0.2657067 (200)	total: 29.4s	remaining: 1m 57s
300:	learn: 0.2560411	test: 0.2630463	best: 0.2630463 (300)	total: 45s	remaining: 1m 44s
400:	learn: 0.2533009	test: 0.2614200	best: 0.2614200 (400)	total: 59.3s	remaining: 1m 28s
500:	learn: 0.2511526	test: 0.2604382	best: 0.2604382 (500)	total: 1m 15s	remaining: 1m 15s
600:	learn: 0.2493851	test: 0.2597818	best: 0.2597818 (600)	total: 1m 31s	remaining: 1m
700:	learn: 0.2478715	test: 0.2593069	best: 0.2593069 (700)	total: 1m 47s	remaining: 45.7s
800:	learn: 0.2463750	test: 0.2589566	best: 0.2589540 (799)	total: 2m 6s	remaining: 31.3s
900:	learn: 0.2450116	test: 0.2586375	best: 0.2586375 (900)	total: 2m 24s	remaining: 15.9s
999:	learn: 0.2436309	test: 0.2583618	best: 0.2583618 (999)	total: 2m 41s	remaining: 0us

bestTest 

### Tune Catboost using Optuna (KFold)

In [53]:
def objective(trial):
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1020)

    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.03, 0.1, log=True),
        "depth": trial.suggest_int("depth", 4, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bernoulli", "MVS"]
        ),
        "n_estimators": trial.suggest_categorical("n_estimators", [1000, 1200, 1400]),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1, 10, log=True),
        "min_data_in_leaf": trial.suggest_categorical("min_data_in_leaf", [512, 1024, 2048]),
        "learning_rate": trial.suggest_categorical("learning_rate", [0.025, 0.05, 0.1]),
        "max_bin": trial.suggest_categorical("max_bin", [63, 127, 255]),
        "used_ram_limit": "12gb",
    }

    if param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.6, 0.85)
    
    print(param)
    train_score_list, val_score_list = [], []
    for fold, (idx_tr, idx_va) in zip(range(1, 5+1), kf.split(train_agg, target)):
        fold = str(fold)
        X_train, y_train = train_agg.iloc[idx_tr], target[idx_tr]
        X_val, y_val = train_agg.iloc[idx_va], target[idx_va]
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=UserWarning)
            model = cb.CatBoostClassifier(**param)
            model.fit(
                X_train, 
                y_train, 
                eval_set=[(X_val, y_val)], 
                verbose=0, 
                cat_features=cat_features,
                early_stopping_rounds=100
            )
        y_train_pred = model.predict(X_train, prediction_type='RawFormulaVal')
        train_score, train_g, train_t4 = amex_metric(y_train, y_train_pred)
        train_data, X_train, y_train = None, None, None
        y_val_pred = model.predict(X_val, prediction_type='RawFormulaVal')
        val_score, val_g, val_t4 = amex_metric(y_val, y_val_pred)
        valid_data, X_val, y_val = None, None, None
        train_score_list.append(train_score)
        val_score_list.append(val_score)
        if val_score > best_scores_json["validation"][fold]:
            best_scores_json["train"][fold] = train_score
            best_scores_json["validation"][fold] = val_score
            with open(f'{CURRENT_EXP_PATH}/best_scores.json', "w") as outfile:
                json.dump(best_scores_json, outfile)
            joblib.dump(model, f'{CURRENT_EXP_PATH}/models/model{fold}.pkl')
        elif np.mean(train_score_list) >= np.mean(list(best_scores_json["train"].values())) + 0.02:
            print(f"Train score too high (overfitting), start a new trial")
            return np.mean(val_score_list)
        print(f"{Fore.BLUE}{Style.BRIGHT}Fold {fold} | Train Score = {train_score:.5f} ({train_g:.4f}, {train_t4:.4f})")
        print(f"{Fore.GREEN}{Style.BRIGHT}Fold {fold} | Val Score = {val_score:.5f} ({val_g:.4f}, {val_t4:.4f}){Style.RESET_ALL}")
        print(f"Clear cache {gc.collect()}")
        
    return np.mean(val_score_list)

In [54]:
# study = joblib.load(f"{CURRENT_EXP_PATH}/optuna_study.pkl")

In [57]:
# study = optuna.create_study(direction="maximize")

In [59]:
study.optimize(objective, n_trials=50)

{'objective': 'Logloss', 'colsample_bylevel': 0.06079765732099717, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 7.6044718629016215, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84184 (0.9419, 0.7417)
[32m[1mFold 1 | Val Score = 0.79243 (0.9219, 0.6629)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84106 (0.9414, 0.7407)
[32m[1mFold 2 | Val Score = 0.79242 (0.9233, 0.6616)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84232 (0.9417, 0.7429)
[32m[1mFold 3 | Val Score = 0.79439 (0.9242, 0.6646)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84181 (0.9416, 0.7420)
[32m[1mFold 4 | Val Score = 0.79802 (0.9246, 0.6714)[0m
Clear cache 0


[32m[I 2022-07-30 10:29:18,789][0m Trial 23 finished with value: 0.7944563728274476 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06079765732099717, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 7.6044718629016215, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84108 (0.9411, 0.7411)
[32m[1mFold 5 | Val Score = 0.79502 (0.9254, 0.6647)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.06678481135488246, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.523346450477207, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85918 (0.9482, 0.7702)
[32m[1mFold 1 | Val Score = 0.79167 (0.9219, 0.6615)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85973 (0.9483, 0.7712)
[32m[1mFold 2 | Val Score = 0.79354 (0.9238, 0.6633)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.85960 (0.9480, 0.7712)
[32m[1mFold 3 | Val Score = 0.79511 (0.9243, 0.6659)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.86155 (0.9486, 0.7745)
[32m[1mFold 4 | Val Score = 0.79594 (0.9243, 0.6675)[0m
Clear cache 0


[32m[I 2022-07-30 11:00:49,370][0m Trial 24 finished with value: 0.794272749764656 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06678481135488246, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.523346450477207, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.85856 (0.9474, 0.7697)
[32m[1mFold 5 | Val Score = 0.79510 (0.9253, 0.6649)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.06047710022425621, 'depth': 5, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.2517951534674845, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.82766 (0.9366, 0.7187)
[32m[1mFold 1 | Val Score = 0.79261 (0.9216, 0.6636)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.82760 (0.9364, 0.7188)
[32m[1mFold 2 | Val Score = 0.79267 (0.9232, 0.6621)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.82670 (0.9361, 0.7173)
[32m[1mFold 3 | Val Score = 0.79450 (0.9240, 0.6650)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.82698 (0.9362, 0.7178)
[32m[1mFold 4 | Val Score = 0.79628 (0.9243, 0.6683)[0m
Clear cache 0


[32m[I 2022-07-30 11:24:51,893][0m Trial 25 finished with value: 0.7939775782040914 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06047710022425621, 'depth': 5, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.2517951534674845, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.82590 (0.9355, 0.7163)
[32m[1mFold 5 | Val Score = 0.79382 (0.9249, 0.6628)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.04908284819520433, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.434230248807744, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}


[32m[I 2022-07-30 11:30:36,238][0m Trial 26 finished with value: 0.7909718881330039 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.04908284819520433, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.434230248807744, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


Train score too high (overfitting), start a new trial
{'objective': 'Logloss', 'colsample_bylevel': 0.06570054164595929, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 8.137437799887632, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84124 (0.9416, 0.7409)
[32m[1mFold 1 | Val Score = 0.79141 (0.9221, 0.6607)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84096 (0.9415, 0.7404)
[32m[1mFold 2 | Val Score = 0.79307 (0.9232, 0.6629)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84252 (0.9416, 0.7434)
[32m[1mFold 3 | Val Score = 0.79429 (0.9241, 0.6645)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84215 (0.9417, 0.7426)
[32m[1mFold 4 | Val Score = 0.79702 (0.9245, 0.6696)[0m
Clear cache 0


[32m[I 2022-07-30 11:58:03,546][0m Trial 27 finished with value: 0.7942823266592958 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06570054164595929, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 8.137437799887632, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.83998 (0.9407, 0.7393)
[32m[1mFold 5 | Val Score = 0.79563 (0.9252, 0.6660)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.0837160779709254, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 2.343770232125307, 'min_data_in_leaf': 1024, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84491 (0.9430, 0.7468)
[32m[1mFold 1 | Val Score = 0.79096 (0.9214, 0.6605)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84360 (0.9424, 0.7448)
[32m[1mFold 2 | Val Score = 0.79384 (0.9229, 0.6648)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84411 (0.9425, 0.7457)
[32m[1mFold 3 | Val Score = 0.79467 (0.9240, 0.6653)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84537 (0.9426, 0.7482)
[32m[1mFold 4 | Val Score = 0.79766 (0.9240, 0.6713)[0m
Clear cache 0


[32m[I 2022-07-30 12:24:43,439][0m Trial 28 finished with value: 0.7944391089288791 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.0837160779709254, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 2.343770232125307, 'min_data_in_leaf': 1024, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84201 (0.9416, 0.7424)
[32m[1mFold 5 | Val Score = 0.79507 (0.9248, 0.6653)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.04965290518703426, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 3.465301482672618, 'min_data_in_leaf': 2048, 'learning_rate': 0.12, 'max_bin': 255, 'used_ram_limit': '12gb', 'subsample': 0.7391571541401726}
[34m[1mFold 1 | Train Score = 0.84434 (0.9424, 0.7463)
[32m[1mFold 1 | Val Score = 0.79152 (0.9212, 0.6619)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84453 (0.9423, 0.7468)
[32m[1mFold 2 | Val Score = 0.79059 (0.9224, 0.6588)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.83903 (0.9401, 0.7380)
[32m[1mFold 3 | Val Score = 0.79170 (0.9233, 0.6601)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.83623 (0.9393, 0.7332)
[32m[1mFold 4 | Val Score = 0.79576 (0.9235, 0.6680)[0m
Clear cache 0


[32m[I 2022-07-30 12:36:24,158][0m Trial 29 finished with value: 0.7932383610999546 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.04965290518703426, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 3.465301482672618, 'min_data_in_leaf': 2048, 'learning_rate': 0.12, 'max_bin': 255, 'subsample': 0.7391571541401726}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84400 (0.9417, 0.7463)
[32m[1mFold 5 | Val Score = 0.79662 (0.9246, 0.6686)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.05608579858755569, 'depth': 9, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.024499794239676, 'min_data_in_leaf': 2048, 'learning_rate': 0.025, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.83525 (0.9394, 0.7311)
[32m[1mFold 1 | Val Score = 0.79091 (0.9215, 0.6604)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.83446 (0.9391, 0.7298)
[32m[1mFold 2 | Val Score = 0.79241 (0.9227, 0.6621)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.83448 (0.9389, 0.7301)
[32m[1mFold 3 | Val Score = 0.79396 (0.9238, 0.6641)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.83534 (0.9392, 0.7315)
[32m[1mFold 4 | Val Score = 0.79686 (0.9242, 0.6695)[0m
Clear cache 0


[32m[I 2022-07-30 13:12:54,023][0m Trial 30 finished with value: 0.7940054720852323 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.05608579858755569, 'depth': 9, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.024499794239676, 'min_data_in_leaf': 2048, 'learning_rate': 0.025, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.83278 (0.9383, 0.7272)
[32m[1mFold 5 | Val Score = 0.79590 (0.9250, 0.6668)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.07116391415625412, 'depth': 9, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 9.363798891875373, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 127, 'used_ram_limit': '12gb', 'subsample': 0.7797815693954372}


[32m[I 2022-07-30 13:16:32,349][0m Trial 31 finished with value: 0.7911730714704922 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.07116391415625412, 'depth': 9, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 9.363798891875373, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 127, 'subsample': 0.7797815693954372}. Best is trial 17 with value: 0.7952103875641641.[0m


Train score too high (overfitting), start a new trial
{'objective': 'Logloss', 'colsample_bylevel': 0.039637123013675585, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 2.7715150269746607, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 127, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84167 (0.9418, 0.7415)
[32m[1mFold 1 | Val Score = 0.79187 (0.9218, 0.6620)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84117 (0.9415, 0.7408)
[32m[1mFold 2 | Val Score = 0.79254 (0.9231, 0.6620)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84114 (0.9414, 0.7409)
[32m[1mFold 3 | Val Score = 0.79572 (0.9242, 0.6672)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84133 (0.9413, 0.7414)
[32m[1mFold 4 | Val Score = 0.79617 (0.9245, 0.6679)[0m
Clear cache 0


[32m[I 2022-07-30 13:35:45,421][0m Trial 32 finished with value: 0.7943868309116611 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.039637123013675585, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 2.7715150269746607, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 127}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.83971 (0.9408, 0.7386)
[32m[1mFold 5 | Val Score = 0.79563 (0.9252, 0.6660)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.060974417323002454, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 7.515567484245174, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84232 (0.9421, 0.7426)
[32m[1mFold 1 | Val Score = 0.79124 (0.9219, 0.6606)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84123 (0.9415, 0.7409)
[32m[1mFold 2 | Val Score = 0.79106 (0.9234, 0.6587)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84169 (0.9416, 0.7417)
[32m[1mFold 3 | Val Score = 0.79518 (0.9242, 0.6661)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84133 (0.9416, 0.7411)
[32m[1mFold 4 | Val Score = 0.79888 (0.9246, 0.6732)[0m
Clear cache 0


[32m[I 2022-07-30 14:00:57,663][0m Trial 33 finished with value: 0.7942373268847405 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.060974417323002454, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 7.515567484245174, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84032 (0.9408, 0.7398)
[32m[1mFold 5 | Val Score = 0.79482 (0.9253, 0.6644)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.05877618589761325, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 6.2418991237047035, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85985 (0.9483, 0.7714)
[32m[1mFold 1 | Val Score = 0.79175 (0.9219, 0.6616)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.86004 (0.9480, 0.7721)
[32m[1mFold 2 | Val Score = 0.79185 (0.9233, 0.6604)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.85944 (0.9479, 0.7709)
[32m[1mFold 3 | Val Score = 0.79658 (0.9244, 0.6688)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.85858 (0.9473, 0.7698)
[32m[1mFold 4 | Val Score = 0.79555 (0.9244, 0.6667)[0m
Clear cache 0


[32m[I 2022-07-30 14:28:48,680][0m Trial 34 finished with value: 0.794303120892953 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.05877618589761325, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 6.2418991237047035, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.85687 (0.9469, 0.7669)
[32m[1mFold 5 | Val Score = 0.79578 (0.9254, 0.6662)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.062121687809993276, 'depth': 4, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 8.062065161055633, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.80747 (0.9287, 0.6862)
[32m[1mFold 1 | Val Score = 0.78969 (0.9203, 0.6591)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.80668 (0.9285, 0.6848)
[32m[1mFold 2 | Val Score = 0.79026 (0.9218, 0.6587)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.80658 (0.9282, 0.6849)
[32m[1mFold 3 | Val Score = 0.79190 (0.9230, 0.6608)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.80662 (0.9282, 0.6850)
[32m[1mFold 4 | Val Score = 0.79531 (0.9233, 0.6674)[0m
Clear cache 0


[32m[I 2022-07-30 14:43:14,424][0m Trial 35 finished with value: 0.7917806109216808 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.062121687809993276, 'depth': 4, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 8.062065161055633, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.80586 (0.9279, 0.6838)
[32m[1mFold 5 | Val Score = 0.79174 (0.9240, 0.6595)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.08047762169145241, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.9005756157948928, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84371 (0.9424, 0.7450)
[32m[1mFold 1 | Val Score = 0.79211 (0.9219, 0.6623)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84366 (0.9422, 0.7451)
[32m[1mFold 2 | Val Score = 0.79354 (0.9231, 0.6639)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84378 (0.9422, 0.7453)
[32m[1mFold 3 | Val Score = 0.79445 (0.9241, 0.6648)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84440 (0.9423, 0.7465)
[32m[1mFold 4 | Val Score = 0.79678 (0.9243, 0.6692)[0m
Clear cache 0


[32m[I 2022-07-30 15:14:15,492][0m Trial 36 finished with value: 0.7945737558981614 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.08047762169145241, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.9005756157948928, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84289 (0.9417, 0.7441)
[32m[1mFold 5 | Val Score = 0.79599 (0.9253, 0.6667)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.08175173619297524, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.775548147107601, 'min_data_in_leaf': 1024, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.83212 (0.9380, 0.7262)
[32m[1mFold 1 | Val Score = 0.79286 (0.9214, 0.6643)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.83241 (0.9377, 0.7271)
[32m[1mFold 2 | Val Score = 0.79112 (0.9228, 0.6594)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.83128 (0.9372, 0.7253)
[32m[1mFold 3 | Val Score = 0.79240 (0.9236, 0.6612)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.83052 (0.9372, 0.7238)
[32m[1mFold 4 | Val Score = 0.79693 (0.9241, 0.6697)[0m
Clear cache 0


[32m[I 2022-07-30 15:30:27,141][0m Trial 37 finished with value: 0.7936453107776799 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.08175173619297524, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.775548147107601, 'min_data_in_leaf': 1024, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.83174 (0.9372, 0.7263)
[32m[1mFold 5 | Val Score = 0.79492 (0.9250, 0.6648)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.08480357641618297, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.9642360570080935, 'min_data_in_leaf': 2048, 'learning_rate': 0.025, 'max_bin': 255, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.82579 (0.9357, 0.7159)
[32m[1mFold 1 | Val Score = 0.79083 (0.9211, 0.6606)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.82541 (0.9355, 0.7153)
[32m[1mFold 2 | Val Score = 0.79092 (0.9224, 0.6594)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.82504 (0.9352, 0.7148)
[32m[1mFold 3 | Val Score = 0.79436 (0.9236, 0.6651)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.82546 (0.9353, 0.7156)
[32m[1mFold 4 | Val Score = 0.79529 (0.9239, 0.6667)[0m
Clear cache 0


[32m[I 2022-07-30 16:22:24,503][0m Trial 38 finished with value: 0.793411796209913 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.08480357641618297, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 3.9642360570080935, 'min_data_in_leaf': 2048, 'learning_rate': 0.025, 'max_bin': 255}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.82320 (0.9345, 0.7119)
[32m[1mFold 5 | Val Score = 0.79565 (0.9247, 0.6666)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.07367762695935444, 'depth': 7, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 2.6757070778120884, 'min_data_in_leaf': 1024, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85351 (0.9455, 0.7615)
[32m[1mFold 1 | Val Score = 0.79013 (0.9213, 0.6589)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85760 (0.9467, 0.7685)
[32m[1mFold 2 | Val Score = 0.79077 (0.9227, 0.6588)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.85523 (0.9457, 0.7647)
[32m[1mFold 3 | Val Score = 0.79387 (0.9237, 0.6640)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.85669 (0.9462, 0.7671)
[32m[1mFold 4 | Val Score = 0.79573 (0.9240, 0.6675)[0m
Clear cache 0


[32m[I 2022-07-30 16:36:26,617][0m Trial 39 finished with value: 0.7931127469142492 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.07367762695935444, 'depth': 7, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 2.6757070778120884, 'min_data_in_leaf': 1024, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.85646 (0.9459, 0.7670)
[32m[1mFold 5 | Val Score = 0.79506 (0.9248, 0.6653)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.07988368273714919, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.608556654802026, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84340 (0.9423, 0.7445)
[32m[1mFold 1 | Val Score = 0.79240 (0.9218, 0.6630)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84374 (0.9423, 0.7452)
[32m[1mFold 2 | Val Score = 0.79371 (0.9234, 0.6640)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84436 (0.9424, 0.7464)
[32m[1mFold 3 | Val Score = 0.79478 (0.9244, 0.6652)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84392 (0.9422, 0.7456)
[32m[1mFold 4 | Val Score = 0.79535 (0.9246, 0.6661)[0m
Clear cache 0


[32m[I 2022-07-30 17:07:16,820][0m Trial 40 finished with value: 0.7944552577197597 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.07988368273714919, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.608556654802026, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84131 (0.9413, 0.7414)
[32m[1mFold 5 | Val Score = 0.79603 (0.9254, 0.6667)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.0989886016680892, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS', 'n_estimators': 1200, 'l2_leaf_reg': 1.8646738874871054, 'min_data_in_leaf': 2048, 'learning_rate': 0.025, 'max_bin': 255, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.80194 (0.9264, 0.6775)
[32m[1mFold 1 | Val Score = 0.78753 (0.9194, 0.6557)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.80185 (0.9261, 0.6776)
[32m[1mFold 2 | Val Score = 0.78815 (0.9207, 0.6556)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.80109 (0.9258, 0.6764)
[32m[1mFold 3 | Val Score = 0.79212 (0.9219, 0.6623)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.80130 (0.9257, 0.6769)
[32m[1mFold 4 | Val Score = 0.79289 (0.9222, 0.6636)[0m
Clear cache 0


[32m[I 2022-07-30 17:22:49,340][0m Trial 41 finished with value: 0.7904884780715635 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.0989886016680892, 'depth': 5, 'boosting_type': 'Plain', 'bootstrap_type': 'MVS', 'n_estimators': 1200, 'l2_leaf_reg': 1.8646738874871054, 'min_data_in_leaf': 2048, 'learning_rate': 0.025, 'max_bin': 255}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.80118 (0.9255, 0.6769)
[32m[1mFold 5 | Val Score = 0.79175 (0.9230, 0.6605)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.05616704665524644, 'depth': 4, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 5.645162332703834, 'min_data_in_leaf': 512, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb', 'subsample': 0.6782154199057993}
[34m[1mFold 1 | Train Score = 0.81002 (0.9298, 0.6902)
[32m[1mFold 1 | Val Score = 0.79075 (0.9207, 0.6608)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.80955 (0.9295, 0.6896)
[32m[1mFold 2 | Val Score = 0.78997 (0.9222, 0.6578)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.80899 (0.9292, 0.6888)
[32m[1mFold 3 | Val Score = 0.79353 (0.9230, 0.6640)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.80909 (0.9293, 0.6889)
[32m[1mFold 4 | Val Score = 0.79420 (0.9234, 0.6650)[0m
Clear cache 0


[32m[I 2022-07-30 17:39:47,130][0m Trial 42 finished with value: 0.7923990308642359 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.05616704665524644, 'depth': 4, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 5.645162332703834, 'min_data_in_leaf': 512, 'learning_rate': 0.07, 'max_bin': 63, 'subsample': 0.6782154199057993}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.80843 (0.9290, 0.6879)
[32m[1mFold 5 | Val Score = 0.79355 (0.9244, 0.6627)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.07023158801166186, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.556031536624793, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84354 (0.9426, 0.7445)
[32m[1mFold 1 | Val Score = 0.79298 (0.9220, 0.6640)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84338 (0.9420, 0.7447)
[32m[1mFold 2 | Val Score = 0.79320 (0.9233, 0.6631)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84270 (0.9423, 0.7431)
[32m[1mFold 3 | Val Score = 0.79507 (0.9244, 0.6657)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84372 (0.9423, 0.7452)
[32m[1mFold 4 | Val Score = 0.79585 (0.9242, 0.6675)[0m
Clear cache 0


[32m[I 2022-07-30 18:07:45,300][0m Trial 43 finished with value: 0.7945890354751909 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.07023158801166186, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.556031536624793, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84075 (0.9411, 0.7404)
[32m[1mFold 5 | Val Score = 0.79585 (0.9254, 0.6663)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.0655429990281379, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.3702033870970105, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84346 (0.9425, 0.7444)
[32m[1mFold 1 | Val Score = 0.79208 (0.9217, 0.6624)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84326 (0.9421, 0.7444)
[32m[1mFold 2 | Val Score = 0.79343 (0.9233, 0.6636)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84099 (0.9414, 0.7405)
[32m[1mFold 3 | Val Score = 0.79606 (0.9244, 0.6678)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84218 (0.9420, 0.7424)
[32m[1mFold 4 | Val Score = 0.79671 (0.9244, 0.6690)[0m
Clear cache 0


[32m[I 2022-07-30 18:34:20,419][0m Trial 44 finished with value: 0.7948221291359064 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.0655429990281379, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.3702033870970105, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.84075 (0.9412, 0.7403)
[32m[1mFold 5 | Val Score = 0.79583 (0.9253, 0.6663)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.06979340031334746, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.567838891813107, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85933 (0.9479, 0.7707)
[32m[1mFold 1 | Val Score = 0.79128 (0.9220, 0.6606)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85958 (0.9482, 0.7710)
[32m[1mFold 2 | Val Score = 0.79445 (0.9234, 0.6655)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.86162 (0.9487, 0.7745)
[32m[1mFold 3 | Val Score = 0.79620 (0.9244, 0.6680)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.85974 (0.9479, 0.7716)
[32m[1mFold 4 | Val Score = 0.79765 (0.9246, 0.6707)[0m
Clear cache 0


[32m[I 2022-07-30 19:06:00,529][0m Trial 45 finished with value: 0.7947811644048677 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06979340031334746, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.567838891813107, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.85877 (0.9474, 0.7701)
[32m[1mFold 5 | Val Score = 0.79433 (0.9253, 0.6633)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.06442995370203353, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.010947575435621, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85987 (0.9486, 0.7711)
[32m[1mFold 1 | Val Score = 0.78997 (0.9219, 0.6580)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85708 (0.9473, 0.7668)
[32m[1mFold 2 | Val Score = 0.79252 (0.9231, 0.6620)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.86052 (0.9483, 0.7727)
[32m[1mFold 3 | Val Score = 0.79570 (0.9242, 0.6672)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.86111 (0.9484, 0.7738)
[32m[1mFold 4 | Val Score = 0.79756 (0.9247, 0.6704)[0m
Clear cache 0


[32m[I 2022-07-30 19:35:41,162][0m Trial 46 finished with value: 0.7943159026677569 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06442995370203353, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.010947575435621, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


[34m[1mFold 5 | Train Score = 0.85967 (0.9476, 0.7717)
[32m[1mFold 5 | Val Score = 0.79583 (0.9258, 0.6659)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.0570134937001418, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 6.025731640557671, 'min_data_in_leaf': 1024, 'learning_rate': 0.12, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85424 (0.9462, 0.7623)
[32m[1mFold 1 | Val Score = 0.78927 (0.9211, 0.6575)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.86562 (0.9502, 0.7810)
[32m[1mFold 2 | Val Score = 0.79037 (0.9225, 0.6582)[0m
Clear cache 0


[32m[I 2022-07-30 19:47:02,636][0m Trial 47 finished with value: 0.7909344709464196 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.0570134937001418, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1000, 'l2_leaf_reg': 6.025731640557671, 'min_data_in_leaf': 1024, 'learning_rate': 0.12, 'max_bin': 63}. Best is trial 17 with value: 0.7952103875641641.[0m


Train score too high (overfitting), start a new trial
{'objective': 'Logloss', 'colsample_bylevel': 0.06413298980628422, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.2290041321509095, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85551 (0.9470, 0.7640)
[32m[1mFold 1 | Val Score = 0.79263 (0.9220, 0.6633)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85988 (0.9481, 0.7716)
[32m[1mFold 2 | Val Score = 0.79343 (0.9233, 0.6636)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.86080 (0.9485, 0.7731)
[32m[1mFold 3 | Val Score = 0.79555 (0.9244, 0.6667)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.86082 (0.9486, 0.7730)
[32m[1mFold 4 | Val Score = 0.79749 (0.9245, 0.6704)[0m
Clear cache 0


[32m[I 2022-07-30 20:16:32,872][0m Trial 48 finished with value: 0.7952443121251143 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06413298980628422, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.2290041321509095, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 48 with value: 0.7952443121251143.[0m


[34m[1mFold 5 | Train Score = 0.85779 (0.9473, 0.7683)
[32m[1mFold 5 | Val Score = 0.79712 (0.9256, 0.6686)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.046381264317842424, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1200, 'l2_leaf_reg': 4.251454591452709, 'min_data_in_leaf': 512, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.85083 (0.9450, 0.7567)
[32m[1mFold 1 | Val Score = 0.79227 (0.9217, 0.6628)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84886 (0.9445, 0.7532)
[32m[1mFold 2 | Val Score = 0.79247 (0.9230, 0.6620)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.85078 (0.9450, 0.7565)
[32m[1mFold 3 | Val Score = 0.79433 (0.9241, 0.6645)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.85193 (0.9450, 0.7588)
[32m[1mFold 4 | Val Score = 0.79784 (0.9245, 0.6712)[0m
Clear cache 0


[32m[I 2022-07-30 20:36:46,714][0m Trial 49 finished with value: 0.7942939188499372 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.046381264317842424, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1200, 'l2_leaf_reg': 4.251454591452709, 'min_data_in_leaf': 512, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 48 with value: 0.7952443121251143.[0m


[34m[1mFold 5 | Train Score = 0.84979 (0.9444, 0.7552)
[32m[1mFold 5 | Val Score = 0.79455 (0.9252, 0.6639)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.05223923375100958, 'depth': 11, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 2.948491321381945, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 127, 'used_ram_limit': '12gb'}


[32m[I 2022-07-30 20:47:32,653][0m Trial 50 finished with value: 0.7907951759985923 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.05223923375100958, 'depth': 11, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 2.948491321381945, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 127}. Best is trial 48 with value: 0.7952443121251143.[0m


Train score too high (overfitting), start a new trial
{'objective': 'Logloss', 'colsample_bylevel': 0.07023946651028405, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 3.574750536251403, 'min_data_in_leaf': 2048, 'learning_rate': 0.12, 'max_bin': 63, 'used_ram_limit': '12gb', 'subsample': 0.6017678682672237}
[34m[1mFold 1 | Train Score = 0.86627 (0.9507, 0.7818)
[32m[1mFold 1 | Val Score = 0.79046 (0.9212, 0.6597)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85899 (0.9481, 0.7699)
[32m[1mFold 2 | Val Score = 0.79097 (0.9220, 0.6599)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84463 (0.9427, 0.7466)
[32m[1mFold 3 | Val Score = 0.79180 (0.9232, 0.6604)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.86416 (0.9497, 0.7786)
[32m[1mFold 4 | Val Score = 0.79550 (0.9234, 0.6676)[0m
Clear cache 0


[32m[I 2022-07-30 21:09:01,752][0m Trial 51 finished with value: 0.792042845019161 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.07023946651028405, 'depth': 8, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'n_estimators': 1400, 'l2_leaf_reg': 3.574750536251403, 'min_data_in_leaf': 2048, 'learning_rate': 0.12, 'max_bin': 63, 'subsample': 0.6017678682672237}. Best is trial 48 with value: 0.7952443121251143.[0m


[34m[1mFold 5 | Train Score = 0.85546 (0.9468, 0.7641)
[32m[1mFold 5 | Val Score = 0.79149 (0.9241, 0.6589)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.042751802624111254, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1200, 'l2_leaf_reg': 2.074222982786614, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 255, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84983 (0.9447, 0.7549)
[32m[1mFold 1 | Val Score = 0.78913 (0.9214, 0.6569)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85007 (0.9446, 0.7556)
[32m[1mFold 2 | Val Score = 0.79269 (0.9229, 0.6625)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.85026 (0.9446, 0.7559)
[32m[1mFold 3 | Val Score = 0.79519 (0.9241, 0.6662)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84987 (0.9445, 0.7553)
[32m[1mFold 4 | Val Score = 0.79602 (0.9241, 0.6679)[0m
Clear cache 0


[32m[I 2022-07-30 21:31:13,346][0m Trial 52 finished with value: 0.7934968062672549 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.042751802624111254, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1200, 'l2_leaf_reg': 2.074222982786614, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 255}. Best is trial 48 with value: 0.7952443121251143.[0m


[34m[1mFold 5 | Train Score = 0.84784 (0.9434, 0.7523)
[32m[1mFold 5 | Val Score = 0.79445 (0.9250, 0.6639)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.06424643294980259, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.108272805150234, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.84267 (0.9424, 0.7430)
[32m[1mFold 1 | Val Score = 0.79171 (0.9218, 0.6617)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.84232 (0.9419, 0.7428)
[32m[1mFold 2 | Val Score = 0.79305 (0.9233, 0.6628)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.84319 (0.9420, 0.7444)
[32m[1mFold 3 | Val Score = 0.79578 (0.9244, 0.6672)[0m
Clear cache 0
[34m[1mFold 4 | Train Score = 0.84342 (0.9421, 0.7448)
[32m[1mFold 4 | Val Score = 0.79694 (0.9243, 0.6696)[0m
Clear cache 0


[32m[I 2022-07-30 21:58:19,335][0m Trial 53 finished with value: 0.794641883578542 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.06424643294980259, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 5.108272805150234, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}. Best is trial 48 with value: 0.7952443121251143.[0m


[34m[1mFold 5 | Train Score = 0.84165 (0.9415, 0.7418)
[32m[1mFold 5 | Val Score = 0.79574 (0.9253, 0.6662)[0m
Clear cache 0
{'objective': 'Logloss', 'colsample_bylevel': 0.06809959507321718, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.168814994770979, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63, 'used_ram_limit': '12gb'}
[34m[1mFold 1 | Train Score = 0.86072 (0.9485, 0.7729)
[32m[1mFold 1 | Val Score = 0.79144 (0.9216, 0.6612)[0m
Clear cache 0
[34m[1mFold 2 | Train Score = 0.85640 (0.9470, 0.7658)
[32m[1mFold 2 | Val Score = 0.79209 (0.9234, 0.6608)[0m
Clear cache 0
[34m[1mFold 3 | Train Score = 0.85363 (0.9458, 0.7614)
[32m[1mFold 3 | Val Score = 0.79385 (0.9240, 0.6637)[0m
Clear cache 0



KeyboardInterrupt



In [60]:
study.best_trial

FrozenTrial(number=48, values=[0.7952443121251143], datetime_start=datetime.datetime(2022, 7, 30, 19, 47, 2, 641815), datetime_complete=datetime.datetime(2022, 7, 30, 20, 16, 32, 872054), params={'objective': 'Logloss', 'colsample_bylevel': 0.06413298980628422, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'MVS', 'n_estimators': 1400, 'l2_leaf_reg': 4.2290041321509095, 'min_data_in_leaf': 2048, 'learning_rate': 0.07, 'max_bin': 63}, distributions={'objective': CategoricalDistribution(choices=('Logloss',)), 'colsample_bylevel': LogUniformDistribution(high=0.1, low=0.03), 'depth': IntUniformDistribution(high=12, low=4, step=1), 'boosting_type': CategoricalDistribution(choices=('Ordered', 'Plain')), 'bootstrap_type': CategoricalDistribution(choices=('Bernoulli', 'MVS')), 'n_estimators': CategoricalDistribution(choices=(1000, 1200, 1400)), 'l2_leaf_reg': LogUniformDistribution(high=10.0, low=1.0), 'min_data_in_leaf': CategoricalDistribution(choices=(512, 1024, 2048)), 'learning

In [61]:
joblib.dump(study, f"{CURRENT_EXP_PATH}/optuna_study.pkl")

['../experiments/catboost_new_exp/optuna_study.pkl']

In [62]:
study_df = study.trials_dataframe()

In [11]:
# for col in get_cols(study_df, "params"):
#     sns.scatterplot(data=study_df, x=study_df[col], y=study_df["value"])
#     plt.show()