In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, SelectionSlider

from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import f1_score

#from stumpy import stump, mstump

import lightgbm as lgbm
from lightgbm import LGBMClassifier

import optuna

from preprocessing import create_tensor, agg_over_months, min_max_scaling, standard_scaling
from model_lgbm import train_lgbm, train_best_lgbm, lgbm_plot_evals, lgbm_f1, double_soft_f1_loss
from training import cv_split, study_summary
from utils import save_pred


plt.rcParams["figure.figsize"] = (12, 6)
#np.random.seed(0)

# Setup

In [2]:
train_df = pd.read_csv("./data/train.csv", index_col=0).drop_duplicates()
test_df = pd.read_csv("./data/test_nolabels.csv", index_col=0)

class0_idx = train_df.loc[train_df.LABELS == 0].index
class1_idx = train_df.loc[train_df.LABELS == 1].index
y1_weight = train_df.LABELS.sum() / train_df.shape[0]
# y1_weight = 0.6757894386583703

In [3]:
# Constants to select pandas columns efficiently
# all months abbreviations: jan, feb, mar, etc.
MONTHS = [m.lower() for m in pd.date_range(0, freq="M", periods=12).strftime("%b").to_list()]

# return all columns based on MONTH: train_df[COL_BY_MONTH["jan"]]
COL_BY_MONTH = {}
for month in MONTHS:
    COL_BY_MONTH[month] = [col for col in train_df.columns if month in col]

# all features
FEATURES = [col for col in train_df.columns if col != "LABELS"]
# all features with the month stripped: S2_B2_
COL_BASE = list({col[:-3] for col in FEATURES})
# return all columns based on FEATURE (e.g., across months): train_df[COL_BY_FEATURE["S2_B2_"]]
# alternatively, you can select the same df using: train_df[COL_BY_FEATURE[FEATURES[0]]]
COL_BY_FEATURE = {}
for feature in COL_BASE:
    COL_BY_FEATURE[feature] = [col for col in train_df.columns if feature in col]

# Preprocessing

In [4]:
agg_train = agg_over_months(train_df, freq=6)
agg_test = agg_over_months(test_df, freq=6)
scaled_train, scaled_test = standard_scaling(train_df, test_df)

In [5]:
pca = PCA(n_components = 20)
train_x = scaled_train[[c for c in train_df.columns if c != "LABELS"]]
projected_train = pd.DataFrame(pca.fit_transform(train_x), columns=[f"comp{i}" for i in range(20)])
projected_train[~np.isfinite(projected_train)] = 0
projected_train["LABELS"] = train_df.reset_index().LABELS

In [8]:
projected_test = pd.DataFrame(pca.transform(test_df), columns=[f"comp{i}" for i in range(20)])
projected_test[~np.isfinite(projected_test)] = 0

## optuna optimization

In [9]:
def objective(trial, train_df):
    # hyperparameter grid for Optuna to explore
    hyperparams = {
        "class_weight": "balanced", 
        "device_type": trial.suggest_categorical("device_type", ["gpu"]),
        # structure
        "max_depth": trial.suggest_int("max_depth", 3, 12, step=1),
        "num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=50),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
        # accuracy
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "n_estimators": trial.suggest_categorical("n_estimators", [500]),
        # overfitting
        #"max_bin": trial.suggest_int("max_bin", 200, 300),
        #"lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        #"lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        #"feature_fraction": trial.suggest_float("feature_fraction", 0.2, 0.95, step=0.1),
    }
    
    # split features and labels
    x_df = train_df.loc[:, ~train_df.columns.isin(["LABELS"])]
    y_df = train_df.loc[:, "LABELS"]
    # create cross-validation folds object
    kfold_cv = StratifiedKFold(n_splits=5, shuffle=True)
    
    # hold the best score of each fold
    cv_scores = []
    for idx, (train_idx, test_idx) in enumerate(kfold_cv.split(x_df, y_df)):
        # train-validation split for each fold
        x_train, x_test = x_df.iloc[train_idx], x_df.iloc[test_idx]
        y_train, y_test = y_df.iloc[train_idx], y_df.iloc[test_idx]
        
        # train classifier with optuna hyperparameters
        clf = LGBMClassifier(objective="binary", **hyperparams)
        clf.fit(
            x_train,
            y_train,
            eval_set=[(x_test, y_test)],
            eval_metric=["logloss", "double_soft_f1_loss"],
        )
        
        # store the best score to the CV score
        best_score = clf.best_score_["valid_0"]["binary_logloss"]#["double_soft_f1_loss"]
        cv_scores.append(best_score)
    
    # return the average best score across CV folds
    return np.mean(cv_scores)

In [10]:
# create optuna study
study = optuna.create_study(direction="minimize", study_name="lgbm_split")
optimize = lambda trial: objective(trial, projected_train)

[32m[I 2021-12-12 18:07:22,792][0m A new study created in memory with name: lgbm_split[0m


In [11]:
# launch optuna study
study.optimize(optimize, n_trials=20)



[1]	valid_0's binary_logloss: 0.67392
[2]	valid_0's binary_logloss: 0.659291
[3]	valid_0's binary_logloss: 0.646892
[4]	valid_0's binary_logloss: 0.638976
[5]	valid_0's binary_logloss: 0.629919
[6]	valid_0's binary_logloss: 0.623536
[7]	valid_0's binary_logloss: 0.61726
[8]	valid_0's binary_logloss: 0.61138
[9]	valid_0's binary_logloss: 0.607726
[10]	valid_0's binary_logloss: 0.604268
[11]	valid_0's binary_logloss: 0.601178
[12]	valid_0's binary_logloss: 0.597886
[13]	valid_0's binary_logloss: 0.595314
[14]	valid_0's binary_logloss: 0.592528
[15]	valid_0's binary_logloss: 0.589947
[16]	valid_0's binary_logloss: 0.588113
[17]	valid_0's binary_logloss: 0.586483
[18]	valid_0's binary_logloss: 0.584856
[19]	valid_0's binary_logloss: 0.583065
[20]	valid_0's binary_logloss: 0.581261
[21]	valid_0's binary_logloss: 0.579965
[22]	valid_0's binary_logloss: 0.578888
[23]	valid_0's binary_logloss: 0.577389
[24]	valid_0's binary_logloss: 0.576211
[25]	valid_0's binary_logloss: 0.574789
[26]	valid_0

[32m[I 2021-12-12 18:07:26,984][0m Trial 0 finished with value: 0.5544291907605169 and parameters: {'device_type': 'gpu', 'max_depth': 7, 'num_leaves': 670, 'min_data_in_leaf': 8200, 'learning_rate': 0.14272374805238366, 'n_estimators': 500, 'lambda_l1': 5, 'lambda_l2': 75, 'min_gain_to_split': 13.9303815139525}. Best is trial 0 with value: 0.5544291907605169.[0m


[279]	valid_0's binary_logloss: 0.55095
[280]	valid_0's binary_logloss: 0.55095
[281]	valid_0's binary_logloss: 0.55095
[282]	valid_0's binary_logloss: 0.55095
[283]	valid_0's binary_logloss: 0.55095
[284]	valid_0's binary_logloss: 0.55095
[285]	valid_0's binary_logloss: 0.55095
[286]	valid_0's binary_logloss: 0.55095
[287]	valid_0's binary_logloss: 0.55095
[288]	valid_0's binary_logloss: 0.55095
[289]	valid_0's binary_logloss: 0.55095
[290]	valid_0's binary_logloss: 0.55095
[291]	valid_0's binary_logloss: 0.55095
[292]	valid_0's binary_logloss: 0.55095
[293]	valid_0's binary_logloss: 0.55095
[294]	valid_0's binary_logloss: 0.55095
[295]	valid_0's binary_logloss: 0.55095
[296]	valid_0's binary_logloss: 0.55095
[297]	valid_0's binary_logloss: 0.55095
[298]	valid_0's binary_logloss: 0.55095
[299]	valid_0's binary_logloss: 0.55095
[300]	valid_0's binary_logloss: 0.55095
[301]	valid_0's binary_logloss: 0.55095
[302]	valid_0's binary_logloss: 0.55095
[303]	valid_0's binary_logloss: 0.55095




[1]	valid_0's binary_logloss: 0.663299
[2]	valid_0's binary_logloss: 0.64556
[3]	valid_0's binary_logloss: 0.631074
[4]	valid_0's binary_logloss: 0.622772
[5]	valid_0's binary_logloss: 0.614585
[6]	valid_0's binary_logloss: 0.607996
[7]	valid_0's binary_logloss: 0.602414
[8]	valid_0's binary_logloss: 0.598445
[9]	valid_0's binary_logloss: 0.594522
[10]	valid_0's binary_logloss: 0.592054
[11]	valid_0's binary_logloss: 0.589315
[12]	valid_0's binary_logloss: 0.58677
[13]	valid_0's binary_logloss: 0.584956
[14]	valid_0's binary_logloss: 0.582677
[15]	valid_0's binary_logloss: 0.580693
[16]	valid_0's binary_logloss: 0.579529
[17]	valid_0's binary_logloss: 0.577952
[18]	valid_0's binary_logloss: 0.576711
[19]	valid_0's binary_logloss: 0.575134
[20]	valid_0's binary_logloss: 0.573779
[21]	valid_0's binary_logloss: 0.572963
[22]	valid_0's binary_logloss: 0.572118
[23]	valid_0's binary_logloss: 0.571248
[24]	valid_0's binary_logloss: 0.570047
[25]	valid_0's binary_logloss: 0.56946
[26]	valid_0

[32m[I 2021-12-12 18:07:29,065][0m Trial 1 finished with value: 0.5395905039209569 and parameters: {'device_type': 'gpu', 'max_depth': 3, 'num_leaves': 470, 'min_data_in_leaf': 8100, 'learning_rate': 0.24669586462642773, 'n_estimators': 500, 'lambda_l1': 35, 'lambda_l2': 30, 'min_gain_to_split': 1.5008935023862153}. Best is trial 1 with value: 0.5395905039209569.[0m


[253]	valid_0's binary_logloss: 0.537017
[254]	valid_0's binary_logloss: 0.537017
[255]	valid_0's binary_logloss: 0.537017
[256]	valid_0's binary_logloss: 0.537017
[257]	valid_0's binary_logloss: 0.537017
[258]	valid_0's binary_logloss: 0.537017
[259]	valid_0's binary_logloss: 0.537017
[260]	valid_0's binary_logloss: 0.537017
[261]	valid_0's binary_logloss: 0.537017
[262]	valid_0's binary_logloss: 0.537017
[263]	valid_0's binary_logloss: 0.537017
[264]	valid_0's binary_logloss: 0.537017
[265]	valid_0's binary_logloss: 0.537017
[266]	valid_0's binary_logloss: 0.537017
[267]	valid_0's binary_logloss: 0.537017
[268]	valid_0's binary_logloss: 0.537017
[269]	valid_0's binary_logloss: 0.537017
[270]	valid_0's binary_logloss: 0.537017
[271]	valid_0's binary_logloss: 0.537017
[272]	valid_0's binary_logloss: 0.537017
[273]	valid_0's binary_logloss: 0.537017
[274]	valid_0's binary_logloss: 0.537017
[275]	valid_0's binary_logloss: 0.537017
[276]	valid_0's binary_logloss: 0.537017
[277]	valid_0's 



[1]	valid_0's binary_logloss: 0.649917
[2]	valid_0's binary_logloss: 0.624714
[3]	valid_0's binary_logloss: 0.608827
[4]	valid_0's binary_logloss: 0.596814
[5]	valid_0's binary_logloss: 0.589529
[6]	valid_0's binary_logloss: 0.581272
[7]	valid_0's binary_logloss: 0.576299
[8]	valid_0's binary_logloss: 0.571161
[9]	valid_0's binary_logloss: 0.566377
[10]	valid_0's binary_logloss: 0.563602
[11]	valid_0's binary_logloss: 0.561592
[12]	valid_0's binary_logloss: 0.557682
[13]	valid_0's binary_logloss: 0.555624
[14]	valid_0's binary_logloss: 0.553319
[15]	valid_0's binary_logloss: 0.552248
[16]	valid_0's binary_logloss: 0.549394
[17]	valid_0's binary_logloss: 0.550225
[18]	valid_0's binary_logloss: 0.549522
[19]	valid_0's binary_logloss: 0.549305
[20]	valid_0's binary_logloss: 0.549132
[21]	valid_0's binary_logloss: 0.548788
[22]	valid_0's binary_logloss: 0.549054
[23]	valid_0's binary_logloss: 0.549054
[24]	valid_0's binary_logloss: 0.549054
[25]	valid_0's binary_logloss: 0.549054
[26]	vali

[32m[I 2021-12-12 18:07:31,087][0m Trial 2 finished with value: 0.5417762336238878 and parameters: {'device_type': 'gpu', 'max_depth': 10, 'num_leaves': 1970, 'min_data_in_leaf': 2900, 'learning_rate': 0.29337375230890556, 'n_estimators': 500, 'lambda_l1': 100, 'lambda_l2': 25, 'min_gain_to_split': 8.30609179721309}. Best is trial 1 with value: 0.5395905039209569.[0m


[282]	valid_0's binary_logloss: 0.542888
[283]	valid_0's binary_logloss: 0.542888
[284]	valid_0's binary_logloss: 0.542888
[285]	valid_0's binary_logloss: 0.542888
[286]	valid_0's binary_logloss: 0.542888
[287]	valid_0's binary_logloss: 0.542888
[288]	valid_0's binary_logloss: 0.542888
[289]	valid_0's binary_logloss: 0.542888
[290]	valid_0's binary_logloss: 0.542888
[291]	valid_0's binary_logloss: 0.542888
[292]	valid_0's binary_logloss: 0.542888
[293]	valid_0's binary_logloss: 0.542888
[294]	valid_0's binary_logloss: 0.542888
[295]	valid_0's binary_logloss: 0.542888
[296]	valid_0's binary_logloss: 0.542888
[297]	valid_0's binary_logloss: 0.542888
[298]	valid_0's binary_logloss: 0.542888
[299]	valid_0's binary_logloss: 0.542888
[300]	valid_0's binary_logloss: 0.542888
[301]	valid_0's binary_logloss: 0.542888
[302]	valid_0's binary_logloss: 0.542888
[303]	valid_0's binary_logloss: 0.542888
[304]	valid_0's binary_logloss: 0.542888
[305]	valid_0's binary_logloss: 0.542888
[306]	valid_0's 



[1]	valid_0's binary_logloss: 0.655052
[2]	valid_0's binary_logloss: 0.63317
[3]	valid_0's binary_logloss: 0.618041
[4]	valid_0's binary_logloss: 0.60922
[5]	valid_0's binary_logloss: 0.600951
[6]	valid_0's binary_logloss: 0.592692
[7]	valid_0's binary_logloss: 0.58705
[8]	valid_0's binary_logloss: 0.582709
[9]	valid_0's binary_logloss: 0.578537
[10]	valid_0's binary_logloss: 0.575881
[11]	valid_0's binary_logloss: 0.573613
[12]	valid_0's binary_logloss: 0.571096
[13]	valid_0's binary_logloss: 0.570063
[14]	valid_0's binary_logloss: 0.567241
[15]	valid_0's binary_logloss: 0.565261
[16]	valid_0's binary_logloss: 0.564407
[17]	valid_0's binary_logloss: 0.562648
[18]	valid_0's binary_logloss: 0.561201
[19]	valid_0's binary_logloss: 0.559483
[20]	valid_0's binary_logloss: 0.558355
[21]	valid_0's binary_logloss: 0.557704
[22]	valid_0's binary_logloss: 0.557391
[23]	valid_0's binary_logloss: 0.55731
[24]	valid_0's binary_logloss: 0.557368
[25]	valid_0's binary_logloss: 0.557368
[26]	valid_0'

[32m[I 2021-12-12 18:07:32,960][0m Trial 3 finished with value: 0.5541956170495255 and parameters: {'device_type': 'gpu', 'max_depth': 7, 'num_leaves': 1970, 'min_data_in_leaf': 6000, 'learning_rate': 0.27538542888533446, 'n_estimators': 500, 'lambda_l1': 85, 'lambda_l2': 0, 'min_gain_to_split': 13.803734672662896}. Best is trial 1 with value: 0.5395905039209569.[0m


[357]	valid_0's binary_logloss: 0.554523
[358]	valid_0's binary_logloss: 0.554523
[359]	valid_0's binary_logloss: 0.554523
[360]	valid_0's binary_logloss: 0.554523
[361]	valid_0's binary_logloss: 0.554523
[362]	valid_0's binary_logloss: 0.554523
[363]	valid_0's binary_logloss: 0.554523
[364]	valid_0's binary_logloss: 0.554523
[365]	valid_0's binary_logloss: 0.554523
[366]	valid_0's binary_logloss: 0.554523
[367]	valid_0's binary_logloss: 0.554523
[368]	valid_0's binary_logloss: 0.554523
[369]	valid_0's binary_logloss: 0.554523
[370]	valid_0's binary_logloss: 0.554523
[371]	valid_0's binary_logloss: 0.554523
[372]	valid_0's binary_logloss: 0.554523
[373]	valid_0's binary_logloss: 0.554523
[374]	valid_0's binary_logloss: 0.554523
[375]	valid_0's binary_logloss: 0.554523
[376]	valid_0's binary_logloss: 0.554523
[377]	valid_0's binary_logloss: 0.554523
[378]	valid_0's binary_logloss: 0.554523
[379]	valid_0's binary_logloss: 0.554523
[380]	valid_0's binary_logloss: 0.554523
[381]	valid_0's 



[10]	valid_0's binary_logloss: 0.597755
[11]	valid_0's binary_logloss: 0.593433
[12]	valid_0's binary_logloss: 0.59002
[13]	valid_0's binary_logloss: 0.588058
[14]	valid_0's binary_logloss: 0.586089
[15]	valid_0's binary_logloss: 0.583595
[16]	valid_0's binary_logloss: 0.582118
[17]	valid_0's binary_logloss: 0.580311
[18]	valid_0's binary_logloss: 0.579245
[19]	valid_0's binary_logloss: 0.577584
[20]	valid_0's binary_logloss: 0.576452
[21]	valid_0's binary_logloss: 0.575347
[22]	valid_0's binary_logloss: 0.574575
[23]	valid_0's binary_logloss: 0.573803
[24]	valid_0's binary_logloss: 0.57228
[25]	valid_0's binary_logloss: 0.571171
[26]	valid_0's binary_logloss: 0.570096
[27]	valid_0's binary_logloss: 0.568573
[28]	valid_0's binary_logloss: 0.568019
[29]	valid_0's binary_logloss: 0.567306
[30]	valid_0's binary_logloss: 0.566553
[31]	valid_0's binary_logloss: 0.566123
[32]	valid_0's binary_logloss: 0.565861
[33]	valid_0's binary_logloss: 0.565173
[34]	valid_0's binary_logloss: 0.564511
[3

[32m[I 2021-12-12 18:07:35,573][0m Trial 4 finished with value: 0.519893592148326 and parameters: {'device_type': 'gpu', 'max_depth': 4, 'num_leaves': 2320, 'min_data_in_leaf': 9100, 'learning_rate': 0.21182897639839363, 'n_estimators': 500, 'lambda_l1': 10, 'lambda_l2': 10, 'min_gain_to_split': 0.568698567900976}. Best is trial 4 with value: 0.519893592148326.[0m


[255]	valid_0's binary_logloss: 0.524473
[256]	valid_0's binary_logloss: 0.524416
[257]	valid_0's binary_logloss: 0.524358
[258]	valid_0's binary_logloss: 0.524322
[259]	valid_0's binary_logloss: 0.52421
[260]	valid_0's binary_logloss: 0.524134
[261]	valid_0's binary_logloss: 0.524085
[262]	valid_0's binary_logloss: 0.523898
[263]	valid_0's binary_logloss: 0.523864
[264]	valid_0's binary_logloss: 0.523746
[265]	valid_0's binary_logloss: 0.523702
[266]	valid_0's binary_logloss: 0.523654
[267]	valid_0's binary_logloss: 0.523539
[268]	valid_0's binary_logloss: 0.523463
[269]	valid_0's binary_logloss: 0.523418
[270]	valid_0's binary_logloss: 0.523279
[271]	valid_0's binary_logloss: 0.523255
[272]	valid_0's binary_logloss: 0.523158
[273]	valid_0's binary_logloss: 0.523036
[274]	valid_0's binary_logloss: 0.523082
[275]	valid_0's binary_logloss: 0.523095
[276]	valid_0's binary_logloss: 0.523055
[277]	valid_0's binary_logloss: 0.522949
[278]	valid_0's binary_logloss: 0.522836
[279]	valid_0's b



[1]	valid_0's binary_logloss: 0.655207
[2]	valid_0's binary_logloss: 0.630848
[3]	valid_0's binary_logloss: 0.614482
[4]	valid_0's binary_logloss: 0.603682
[5]	valid_0's binary_logloss: 0.595842
[6]	valid_0's binary_logloss: 0.590208
[7]	valid_0's binary_logloss: 0.584338
[8]	valid_0's binary_logloss: 0.579379
[9]	valid_0's binary_logloss: 0.574932
[10]	valid_0's binary_logloss: 0.572009
[11]	valid_0's binary_logloss: 0.569509
[12]	valid_0's binary_logloss: 0.566705
[13]	valid_0's binary_logloss: 0.563737
[14]	valid_0's binary_logloss: 0.561142
[15]	valid_0's binary_logloss: 0.55939
[16]	valid_0's binary_logloss: 0.556839
[17]	valid_0's binary_logloss: 0.55526
[18]	valid_0's binary_logloss: 0.553175
[19]	valid_0's binary_logloss: 0.552026
[20]	valid_0's binary_logloss: 0.549686
[21]	valid_0's binary_logloss: 0.548993
[22]	valid_0's binary_logloss: 0.547824
[23]	valid_0's binary_logloss: 0.546013
[24]	valid_0's binary_logloss: 0.544843
[25]	valid_0's binary_logloss: 0.543756
[26]	valid_

[32m[I 2021-12-12 18:07:37,531][0m Trial 5 finished with value: 0.5259357388634295 and parameters: {'device_type': 'gpu', 'max_depth': 6, 'num_leaves': 1770, 'min_data_in_leaf': 4500, 'learning_rate': 0.24992558415372895, 'n_estimators': 500, 'lambda_l1': 15, 'lambda_l2': 10, 'min_gain_to_split': 5.051699623284221}. Best is trial 4 with value: 0.519893592148326.[0m


[248]	valid_0's binary_logloss: 0.519919
[249]	valid_0's binary_logloss: 0.519919
[250]	valid_0's binary_logloss: 0.519919
[251]	valid_0's binary_logloss: 0.519919
[252]	valid_0's binary_logloss: 0.519919
[253]	valid_0's binary_logloss: 0.519919
[254]	valid_0's binary_logloss: 0.519919
[255]	valid_0's binary_logloss: 0.519919
[256]	valid_0's binary_logloss: 0.519919
[257]	valid_0's binary_logloss: 0.519919
[258]	valid_0's binary_logloss: 0.519919
[259]	valid_0's binary_logloss: 0.519919
[260]	valid_0's binary_logloss: 0.519919
[261]	valid_0's binary_logloss: 0.519919
[262]	valid_0's binary_logloss: 0.519919
[263]	valid_0's binary_logloss: 0.519919
[264]	valid_0's binary_logloss: 0.519919
[265]	valid_0's binary_logloss: 0.519919
[266]	valid_0's binary_logloss: 0.519919
[267]	valid_0's binary_logloss: 0.519919
[268]	valid_0's binary_logloss: 0.519919
[269]	valid_0's binary_logloss: 0.519919
[270]	valid_0's binary_logloss: 0.519919
[271]	valid_0's binary_logloss: 0.519919
[272]	valid_0's 



[1]	valid_0's binary_logloss: 0.677341
[2]	valid_0's binary_logloss: 0.664457
[3]	valid_0's binary_logloss: 0.653412
[4]	valid_0's binary_logloss: 0.644023
[5]	valid_0's binary_logloss: 0.635417
[6]	valid_0's binary_logloss: 0.628951
[7]	valid_0's binary_logloss: 0.62291
[8]	valid_0's binary_logloss: 0.61727
[9]	valid_0's binary_logloss: 0.612393
[10]	valid_0's binary_logloss: 0.607712
[11]	valid_0's binary_logloss: 0.603605
[12]	valid_0's binary_logloss: 0.599642
[13]	valid_0's binary_logloss: 0.596771
[14]	valid_0's binary_logloss: 0.593443
[15]	valid_0's binary_logloss: 0.590521
[16]	valid_0's binary_logloss: 0.587347
[17]	valid_0's binary_logloss: 0.58502
[18]	valid_0's binary_logloss: 0.582586
[19]	valid_0's binary_logloss: 0.58052
[20]	valid_0's binary_logloss: 0.578354
[21]	valid_0's binary_logloss: 0.57643
[22]	valid_0's binary_logloss: 0.574268
[23]	valid_0's binary_logloss: 0.572288
[24]	valid_0's binary_logloss: 0.570407
[25]	valid_0's binary_logloss: 0.569172
[26]	valid_0's

[32m[I 2021-12-12 18:07:39,773][0m Trial 6 finished with value: 0.5409743842539679 and parameters: {'device_type': 'gpu', 'max_depth': 8, 'num_leaves': 1270, 'min_data_in_leaf': 2800, 'learning_rate': 0.09417066529589396, 'n_estimators': 500, 'lambda_l1': 90, 'lambda_l2': 100, 'min_gain_to_split': 9.144235722025917}. Best is trial 4 with value: 0.519893592148326.[0m



[188]	valid_0's binary_logloss: 0.53996
[189]	valid_0's binary_logloss: 0.53996
[190]	valid_0's binary_logloss: 0.53996
[191]	valid_0's binary_logloss: 0.53996
[192]	valid_0's binary_logloss: 0.53996
[193]	valid_0's binary_logloss: 0.53996
[194]	valid_0's binary_logloss: 0.53996
[195]	valid_0's binary_logloss: 0.53996
[196]	valid_0's binary_logloss: 0.53996
[197]	valid_0's binary_logloss: 0.53996
[198]	valid_0's binary_logloss: 0.53996
[199]	valid_0's binary_logloss: 0.53996
[200]	valid_0's binary_logloss: 0.53996
[201]	valid_0's binary_logloss: 0.53996
[202]	valid_0's binary_logloss: 0.53996
[203]	valid_0's binary_logloss: 0.53996
[204]	valid_0's binary_logloss: 0.53996
[205]	valid_0's binary_logloss: 0.53996
[206]	valid_0's binary_logloss: 0.53996
[207]	valid_0's binary_logloss: 0.53996
[208]	valid_0's binary_logloss: 0.53996
[209]	valid_0's binary_logloss: 0.53996
[210]	valid_0's binary_logloss: 0.53996
[211]	valid_0's binary_logloss: 0.53996
[212]	valid_0's binary_logloss: 0.53996



[1]	valid_0's binary_logloss: 0.65756
[2]	valid_0's binary_logloss: 0.635356
[3]	valid_0's binary_logloss: 0.61994
[4]	valid_0's binary_logloss: 0.610391
[5]	valid_0's binary_logloss: 0.601085
[6]	valid_0's binary_logloss: 0.594227
[7]	valid_0's binary_logloss: 0.58844
[8]	valid_0's binary_logloss: 0.58375
[9]	valid_0's binary_logloss: 0.579932
[10]	valid_0's binary_logloss: 0.577539
[11]	valid_0's binary_logloss: 0.575483
[12]	valid_0's binary_logloss: 0.572036
[13]	valid_0's binary_logloss: 0.570151
[14]	valid_0's binary_logloss: 0.567951
[15]	valid_0's binary_logloss: 0.565945
[16]	valid_0's binary_logloss: 0.563609
[17]	valid_0's binary_logloss: 0.562227
[18]	valid_0's binary_logloss: 0.559572
[19]	valid_0's binary_logloss: 0.558389
[20]	valid_0's binary_logloss: 0.557878
[21]	valid_0's binary_logloss: 0.557475
[22]	valid_0's binary_logloss: 0.556707
[23]	valid_0's binary_logloss: 0.556043
[24]	valid_0's binary_logloss: 0.555112
[25]	valid_0's binary_logloss: 0.55339
[26]	valid_0's

[32m[I 2021-12-12 18:07:41,748][0m Trial 7 finished with value: 0.5487054684428154 and parameters: {'device_type': 'gpu', 'max_depth': 10, 'num_leaves': 2170, 'min_data_in_leaf': 7000, 'learning_rate': 0.2732529233908443, 'n_estimators': 500, 'lambda_l1': 75, 'lambda_l2': 5, 'min_gain_to_split': 5.059030561554837}. Best is trial 4 with value: 0.519893592148326.[0m


[283]	valid_0's binary_logloss: 0.555393
[284]	valid_0's binary_logloss: 0.555393
[285]	valid_0's binary_logloss: 0.555393
[286]	valid_0's binary_logloss: 0.555393
[287]	valid_0's binary_logloss: 0.555393
[288]	valid_0's binary_logloss: 0.555393
[289]	valid_0's binary_logloss: 0.555393
[290]	valid_0's binary_logloss: 0.555393
[291]	valid_0's binary_logloss: 0.555393
[292]	valid_0's binary_logloss: 0.555393
[293]	valid_0's binary_logloss: 0.555393
[294]	valid_0's binary_logloss: 0.555393
[295]	valid_0's binary_logloss: 0.555393
[296]	valid_0's binary_logloss: 0.555393
[297]	valid_0's binary_logloss: 0.555393
[298]	valid_0's binary_logloss: 0.555393
[299]	valid_0's binary_logloss: 0.555393
[300]	valid_0's binary_logloss: 0.555393
[301]	valid_0's binary_logloss: 0.555393
[302]	valid_0's binary_logloss: 0.555393
[303]	valid_0's binary_logloss: 0.555393
[304]	valid_0's binary_logloss: 0.555393
[305]	valid_0's binary_logloss: 0.555393
[306]	valid_0's binary_logloss: 0.555393
[307]	valid_0's 



[1]	valid_0's binary_logloss: 0.676198
[2]	valid_0's binary_logloss: 0.663193
[3]	valid_0's binary_logloss: 0.652073
[4]	valid_0's binary_logloss: 0.643393
[5]	valid_0's binary_logloss: 0.634192
[6]	valid_0's binary_logloss: 0.628257
[7]	valid_0's binary_logloss: 0.622125
[8]	valid_0's binary_logloss: 0.617067
[9]	valid_0's binary_logloss: 0.613638
[10]	valid_0's binary_logloss: 0.609107
[11]	valid_0's binary_logloss: 0.605483
[12]	valid_0's binary_logloss: 0.603113
[13]	valid_0's binary_logloss: 0.600412
[14]	valid_0's binary_logloss: 0.59805
[15]	valid_0's binary_logloss: 0.595245
[16]	valid_0's binary_logloss: 0.593108
[17]	valid_0's binary_logloss: 0.591406
[18]	valid_0's binary_logloss: 0.589359
[19]	valid_0's binary_logloss: 0.587812
[20]	valid_0's binary_logloss: 0.585773
[21]	valid_0's binary_logloss: 0.584568
[22]	valid_0's binary_logloss: 0.582531
[23]	valid_0's binary_logloss: 0.581336
[24]	valid_0's binary_logloss: 0.579976
[25]	valid_0's binary_logloss: 0.579033
[26]	valid

[32m[I 2021-12-12 18:07:43,813][0m Trial 8 finished with value: 0.5539646608444675 and parameters: {'device_type': 'gpu', 'max_depth': 6, 'num_leaves': 1920, 'min_data_in_leaf': 8500, 'learning_rate': 0.12008160060210357, 'n_estimators': 500, 'lambda_l1': 20, 'lambda_l2': 70, 'min_gain_to_split': 9.247265196089941}. Best is trial 4 with value: 0.519893592148326.[0m


[245]	valid_0's binary_logloss: 0.554599
[246]	valid_0's binary_logloss: 0.554599
[247]	valid_0's binary_logloss: 0.554599
[248]	valid_0's binary_logloss: 0.554599
[249]	valid_0's binary_logloss: 0.554599
[250]	valid_0's binary_logloss: 0.554599
[251]	valid_0's binary_logloss: 0.554599
[252]	valid_0's binary_logloss: 0.554599
[253]	valid_0's binary_logloss: 0.554599
[254]	valid_0's binary_logloss: 0.554599
[255]	valid_0's binary_logloss: 0.554599
[256]	valid_0's binary_logloss: 0.554599
[257]	valid_0's binary_logloss: 0.554599
[258]	valid_0's binary_logloss: 0.554599
[259]	valid_0's binary_logloss: 0.554599
[260]	valid_0's binary_logloss: 0.554599
[261]	valid_0's binary_logloss: 0.554599
[262]	valid_0's binary_logloss: 0.554599
[263]	valid_0's binary_logloss: 0.554599
[264]	valid_0's binary_logloss: 0.554599
[265]	valid_0's binary_logloss: 0.554599
[266]	valid_0's binary_logloss: 0.554599
[267]	valid_0's binary_logloss: 0.554599
[268]	valid_0's binary_logloss: 0.554599
[269]	valid_0's 



[1]	valid_0's binary_logloss: 0.671011
[2]	valid_0's binary_logloss: 0.654467
[3]	valid_0's binary_logloss: 0.641563
[4]	valid_0's binary_logloss: 0.633824
[5]	valid_0's binary_logloss: 0.626313
[6]	valid_0's binary_logloss: 0.622086
[7]	valid_0's binary_logloss: 0.616813
[8]	valid_0's binary_logloss: 0.611451
[9]	valid_0's binary_logloss: 0.606923
[10]	valid_0's binary_logloss: 0.602616
[11]	valid_0's binary_logloss: 0.599494
[12]	valid_0's binary_logloss: 0.596997
[13]	valid_0's binary_logloss: 0.594615
[14]	valid_0's binary_logloss: 0.592119
[15]	valid_0's binary_logloss: 0.590484
[16]	valid_0's binary_logloss: 0.588812
[17]	valid_0's binary_logloss: 0.586704
[18]	valid_0's binary_logloss: 0.585245
[19]	valid_0's binary_logloss: 0.583571
[20]	valid_0's binary_logloss: 0.582042
[21]	valid_0's binary_logloss: 0.581253
[22]	valid_0's binary_logloss: 0.579698
[23]	valid_0's binary_logloss: 0.578752
[24]	valid_0's binary_logloss: 0.577579
[25]	valid_0's binary_logloss: 0.576956
[26]	vali

[32m[I 2021-12-12 18:07:46,273][0m Trial 9 finished with value: 0.5344837301125969 and parameters: {'device_type': 'gpu', 'max_depth': 3, 'num_leaves': 1120, 'min_data_in_leaf': 9000, 'learning_rate': 0.18536915496367537, 'n_estimators': 500, 'lambda_l1': 50, 'lambda_l2': 70, 'min_gain_to_split': 0.1560777949543346}. Best is trial 4 with value: 0.519893592148326.[0m


[462]	valid_0's binary_logloss: 0.535669
[463]	valid_0's binary_logloss: 0.535669
[464]	valid_0's binary_logloss: 0.535669
[465]	valid_0's binary_logloss: 0.535669
[466]	valid_0's binary_logloss: 0.535669
[467]	valid_0's binary_logloss: 0.535669
[468]	valid_0's binary_logloss: 0.535669
[469]	valid_0's binary_logloss: 0.535669
[470]	valid_0's binary_logloss: 0.535669
[471]	valid_0's binary_logloss: 0.535669
[472]	valid_0's binary_logloss: 0.535669
[473]	valid_0's binary_logloss: 0.535669
[474]	valid_0's binary_logloss: 0.535669
[475]	valid_0's binary_logloss: 0.535669
[476]	valid_0's binary_logloss: 0.535669
[477]	valid_0's binary_logloss: 0.535669
[478]	valid_0's binary_logloss: 0.535669
[479]	valid_0's binary_logloss: 0.535669
[480]	valid_0's binary_logloss: 0.535669
[481]	valid_0's binary_logloss: 0.535669
[482]	valid_0's binary_logloss: 0.535669
[483]	valid_0's binary_logloss: 0.535669
[484]	valid_0's binary_logloss: 0.535669
[485]	valid_0's binary_logloss: 0.535669
[486]	valid_0's 



[39]	valid_0's binary_logloss: 0.610639
[40]	valid_0's binary_logloss: 0.609489
[41]	valid_0's binary_logloss: 0.60859
[42]	valid_0's binary_logloss: 0.607503
[43]	valid_0's binary_logloss: 0.607028
[44]	valid_0's binary_logloss: 0.606046
[45]	valid_0's binary_logloss: 0.605342
[46]	valid_0's binary_logloss: 0.604891
[47]	valid_0's binary_logloss: 0.604062
[48]	valid_0's binary_logloss: 0.603352
[49]	valid_0's binary_logloss: 0.602581
[50]	valid_0's binary_logloss: 0.602052
[51]	valid_0's binary_logloss: 0.601211
[52]	valid_0's binary_logloss: 0.600448
[53]	valid_0's binary_logloss: 0.599899
[54]	valid_0's binary_logloss: 0.599236
[55]	valid_0's binary_logloss: 0.598603
[56]	valid_0's binary_logloss: 0.598014
[57]	valid_0's binary_logloss: 0.597554
[58]	valid_0's binary_logloss: 0.596846
[59]	valid_0's binary_logloss: 0.596459
[60]	valid_0's binary_logloss: 0.59585
[61]	valid_0's binary_logloss: 0.595311
[62]	valid_0's binary_logloss: 0.594913
[63]	valid_0's binary_logloss: 0.594427
[6

[32m[I 2021-12-12 18:07:49,269][0m Trial 10 finished with value: 0.5557662267864645 and parameters: {'device_type': 'gpu', 'max_depth': 5, 'num_leaves': 2970, 'min_data_in_leaf': 9800, 'learning_rate': 0.038077209556916805, 'n_estimators': 500, 'lambda_l1': 60, 'lambda_l2': 45, 'min_gain_to_split': 2.998348656915453}. Best is trial 4 with value: 0.519893592148326.[0m


[360]	valid_0's binary_logloss: 0.56114
[361]	valid_0's binary_logloss: 0.561095
[362]	valid_0's binary_logloss: 0.561083
[363]	valid_0's binary_logloss: 0.560983
[364]	valid_0's binary_logloss: 0.560928
[365]	valid_0's binary_logloss: 0.560912
[366]	valid_0's binary_logloss: 0.560842
[367]	valid_0's binary_logloss: 0.560804
[368]	valid_0's binary_logloss: 0.560797
[369]	valid_0's binary_logloss: 0.560744
[370]	valid_0's binary_logloss: 0.560676
[371]	valid_0's binary_logloss: 0.560676
[372]	valid_0's binary_logloss: 0.560672
[373]	valid_0's binary_logloss: 0.560632
[374]	valid_0's binary_logloss: 0.560617
[375]	valid_0's binary_logloss: 0.56058
[376]	valid_0's binary_logloss: 0.56057
[377]	valid_0's binary_logloss: 0.560566
[378]	valid_0's binary_logloss: 0.560514
[379]	valid_0's binary_logloss: 0.560502
[380]	valid_0's binary_logloss: 0.560492
[381]	valid_0's binary_logloss: 0.560481
[382]	valid_0's binary_logloss: 0.560447
[383]	valid_0's binary_logloss: 0.560352
[384]	valid_0's bin



[1]	valid_0's binary_logloss: 0.661156
[2]	valid_0's binary_logloss: 0.638568
[3]	valid_0's binary_logloss: 0.622098
[4]	valid_0's binary_logloss: 0.611375
[5]	valid_0's binary_logloss: 0.603439
[6]	valid_0's binary_logloss: 0.595843
[7]	valid_0's binary_logloss: 0.589795
[8]	valid_0's binary_logloss: 0.58478
[9]	valid_0's binary_logloss: 0.579844
[10]	valid_0's binary_logloss: 0.575356
[11]	valid_0's binary_logloss: 0.5725
[12]	valid_0's binary_logloss: 0.569083
[13]	valid_0's binary_logloss: 0.56625
[14]	valid_0's binary_logloss: 0.564118
[15]	valid_0's binary_logloss: 0.562126
[16]	valid_0's binary_logloss: 0.560593
[17]	valid_0's binary_logloss: 0.558765
[18]	valid_0's binary_logloss: 0.556694
[19]	valid_0's binary_logloss: 0.553924
[20]	valid_0's binary_logloss: 0.551864
[21]	valid_0's binary_logloss: 0.550684
[22]	valid_0's binary_logloss: 0.549319
[23]	valid_0's binary_logloss: 0.547534
[24]	valid_0's binary_logloss: 0.546135
[25]	valid_0's binary_logloss: 0.54536
[26]	valid_0's

[32m[I 2021-12-12 18:07:51,382][0m Trial 11 finished with value: 0.5232985950980118 and parameters: {'device_type': 'gpu', 'max_depth': 5, 'num_leaves': 2770, 'min_data_in_leaf': 4500, 'learning_rate': 0.2032488430007506, 'n_estimators': 500, 'lambda_l1': 0, 'lambda_l2': 20, 'min_gain_to_split': 4.996160855387339}. Best is trial 4 with value: 0.519893592148326.[0m


[190]	valid_0's binary_logloss: 0.527531
[191]	valid_0's binary_logloss: 0.527531
[192]	valid_0's binary_logloss: 0.527531
[193]	valid_0's binary_logloss: 0.527531
[194]	valid_0's binary_logloss: 0.527531
[195]	valid_0's binary_logloss: 0.527531
[196]	valid_0's binary_logloss: 0.527531
[197]	valid_0's binary_logloss: 0.527531
[198]	valid_0's binary_logloss: 0.527531
[199]	valid_0's binary_logloss: 0.527531
[200]	valid_0's binary_logloss: 0.527531
[201]	valid_0's binary_logloss: 0.527531
[202]	valid_0's binary_logloss: 0.527531
[203]	valid_0's binary_logloss: 0.527531
[204]	valid_0's binary_logloss: 0.527531
[205]	valid_0's binary_logloss: 0.527531
[206]	valid_0's binary_logloss: 0.527531
[207]	valid_0's binary_logloss: 0.527531
[208]	valid_0's binary_logloss: 0.527531
[209]	valid_0's binary_logloss: 0.527531
[210]	valid_0's binary_logloss: 0.527531
[211]	valid_0's binary_logloss: 0.527531
[212]	valid_0's binary_logloss: 0.527531
[213]	valid_0's binary_logloss: 0.527531
[214]	valid_0's 



[1]	valid_0's binary_logloss: 0.658149
[2]	valid_0's binary_logloss: 0.634927
[3]	valid_0's binary_logloss: 0.617942
[4]	valid_0's binary_logloss: 0.604497
[5]	valid_0's binary_logloss: 0.593803
[6]	valid_0's binary_logloss: 0.586089
[7]	valid_0's binary_logloss: 0.579765
[8]	valid_0's binary_logloss: 0.574801
[9]	valid_0's binary_logloss: 0.571332
[10]	valid_0's binary_logloss: 0.567597
[11]	valid_0's binary_logloss: 0.562323
[12]	valid_0's binary_logloss: 0.56067
[13]	valid_0's binary_logloss: 0.557987
[14]	valid_0's binary_logloss: 0.555361
[15]	valid_0's binary_logloss: 0.552932
[16]	valid_0's binary_logloss: 0.548576
[17]	valid_0's binary_logloss: 0.545388
[18]	valid_0's binary_logloss: 0.544028
[19]	valid_0's binary_logloss: 0.542282
[20]	valid_0's binary_logloss: 0.540534
[21]	valid_0's binary_logloss: 0.539435
[22]	valid_0's binary_logloss: 0.538549
[23]	valid_0's binary_logloss: 0.537693
[24]	valid_0's binary_logloss: 0.536063
[25]	valid_0's binary_logloss: 0.535603
[26]	valid

[32m[I 2021-12-12 18:07:53,505][0m Trial 12 finished with value: 0.5071252980236087 and parameters: {'device_type': 'gpu', 'max_depth': 4, 'num_leaves': 2770, 'min_data_in_leaf': 1200, 'learning_rate': 0.20208612810988877, 'n_estimators': 500, 'lambda_l1': 0, 'lambda_l2': 25, 'min_gain_to_split': 4.257034488884486}. Best is trial 12 with value: 0.5071252980236087.[0m


[196]	valid_0's binary_logloss: 0.507872
[197]	valid_0's binary_logloss: 0.507872
[198]	valid_0's binary_logloss: 0.507872
[199]	valid_0's binary_logloss: 0.507872
[200]	valid_0's binary_logloss: 0.507872
[201]	valid_0's binary_logloss: 0.507872
[202]	valid_0's binary_logloss: 0.507872
[203]	valid_0's binary_logloss: 0.507872
[204]	valid_0's binary_logloss: 0.507872
[205]	valid_0's binary_logloss: 0.507872
[206]	valid_0's binary_logloss: 0.507872
[207]	valid_0's binary_logloss: 0.507872
[208]	valid_0's binary_logloss: 0.507872
[209]	valid_0's binary_logloss: 0.507872
[210]	valid_0's binary_logloss: 0.507872
[211]	valid_0's binary_logloss: 0.507872
[212]	valid_0's binary_logloss: 0.507872
[213]	valid_0's binary_logloss: 0.507872
[214]	valid_0's binary_logloss: 0.507872
[215]	valid_0's binary_logloss: 0.507872
[216]	valid_0's binary_logloss: 0.507872
[217]	valid_0's binary_logloss: 0.507872
[218]	valid_0's binary_logloss: 0.507872
[219]	valid_0's binary_logloss: 0.507872
[220]	valid_0's 



[1]	valid_0's binary_logloss: 0.650915
[2]	valid_0's binary_logloss: 0.622178
[3]	valid_0's binary_logloss: 0.600003
[4]	valid_0's binary_logloss: 0.583326
[5]	valid_0's binary_logloss: 0.569101
[6]	valid_0's binary_logloss: 0.5581
[7]	valid_0's binary_logloss: 0.548831
[8]	valid_0's binary_logloss: 0.54099
[9]	valid_0's binary_logloss: 0.534416
[10]	valid_0's binary_logloss: 0.528899
[11]	valid_0's binary_logloss: 0.523472
[12]	valid_0's binary_logloss: 0.5191
[13]	valid_0's binary_logloss: 0.515507
[14]	valid_0's binary_logloss: 0.511607
[15]	valid_0's binary_logloss: 0.509109
[16]	valid_0's binary_logloss: 0.506376
[17]	valid_0's binary_logloss: 0.50445
[18]	valid_0's binary_logloss: 0.502531
[19]	valid_0's binary_logloss: 0.501001
[20]	valid_0's binary_logloss: 0.498755
[21]	valid_0's binary_logloss: 0.497225
[22]	valid_0's binary_logloss: 0.495856
[23]	valid_0's binary_logloss: 0.494619
[24]	valid_0's binary_logloss: 0.493268
[25]	valid_0's binary_logloss: 0.492626
[26]	valid_0's 

[32m[I 2021-12-12 18:07:56,550][0m Trial 13 finished with value: 0.4916794108004551 and parameters: {'device_type': 'gpu', 'max_depth': 12, 'num_leaves': 2520, 'min_data_in_leaf': 300, 'learning_rate': 0.2011884316999342, 'n_estimators': 500, 'lambda_l1': 30, 'lambda_l2': 35, 'min_gain_to_split': 2.7591039026962063}. Best is trial 13 with value: 0.4916794108004551.[0m


[318]	valid_0's binary_logloss: 0.48595
[319]	valid_0's binary_logloss: 0.48595
[320]	valid_0's binary_logloss: 0.48595
[321]	valid_0's binary_logloss: 0.48595
[322]	valid_0's binary_logloss: 0.48595
[323]	valid_0's binary_logloss: 0.48595
[324]	valid_0's binary_logloss: 0.48595
[325]	valid_0's binary_logloss: 0.48595
[326]	valid_0's binary_logloss: 0.48595
[327]	valid_0's binary_logloss: 0.48595
[328]	valid_0's binary_logloss: 0.48595
[329]	valid_0's binary_logloss: 0.48595
[330]	valid_0's binary_logloss: 0.48595
[331]	valid_0's binary_logloss: 0.48595
[332]	valid_0's binary_logloss: 0.48595
[333]	valid_0's binary_logloss: 0.48595
[334]	valid_0's binary_logloss: 0.48595
[335]	valid_0's binary_logloss: 0.48595
[336]	valid_0's binary_logloss: 0.48595
[337]	valid_0's binary_logloss: 0.48595
[338]	valid_0's binary_logloss: 0.48595
[339]	valid_0's binary_logloss: 0.48595
[340]	valid_0's binary_logloss: 0.48595
[341]	valid_0's binary_logloss: 0.48595
[342]	valid_0's binary_logloss: 0.48595




[1]	valid_0's binary_logloss: 0.659617
[2]	valid_0's binary_logloss: 0.634828
[3]	valid_0's binary_logloss: 0.61661
[4]	valid_0's binary_logloss: 0.60282
[5]	valid_0's binary_logloss: 0.589782
[6]	valid_0's binary_logloss: 0.580481
[7]	valid_0's binary_logloss: 0.572713
[8]	valid_0's binary_logloss: 0.566336
[9]	valid_0's binary_logloss: 0.560028
[10]	valid_0's binary_logloss: 0.554285
[11]	valid_0's binary_logloss: 0.549873
[12]	valid_0's binary_logloss: 0.545035
[13]	valid_0's binary_logloss: 0.541532
[14]	valid_0's binary_logloss: 0.538579
[15]	valid_0's binary_logloss: 0.535182
[16]	valid_0's binary_logloss: 0.532932
[17]	valid_0's binary_logloss: 0.530585
[18]	valid_0's binary_logloss: 0.528753
[19]	valid_0's binary_logloss: 0.527049
[20]	valid_0's binary_logloss: 0.524645
[21]	valid_0's binary_logloss: 0.523312
[22]	valid_0's binary_logloss: 0.521211
[23]	valid_0's binary_logloss: 0.519594
[24]	valid_0's binary_logloss: 0.518271
[25]	valid_0's binary_logloss: 0.517113
[26]	valid_

[32m[I 2021-12-12 18:07:59,462][0m Trial 14 finished with value: 0.4999887727084653 and parameters: {'device_type': 'gpu', 'max_depth': 12, 'num_leaves': 2620, 'min_data_in_leaf': 700, 'learning_rate': 0.17020435275294882, 'n_estimators': 500, 'lambda_l1': 35, 'lambda_l2': 45, 'min_gain_to_split': 3.0728648586764695}. Best is trial 13 with value: 0.4916794108004551.[0m


[410]	valid_0's binary_logloss: 0.49766
[411]	valid_0's binary_logloss: 0.49766
[412]	valid_0's binary_logloss: 0.49766
[413]	valid_0's binary_logloss: 0.49766
[414]	valid_0's binary_logloss: 0.49766
[415]	valid_0's binary_logloss: 0.49766
[416]	valid_0's binary_logloss: 0.49766
[417]	valid_0's binary_logloss: 0.49766
[418]	valid_0's binary_logloss: 0.49766
[419]	valid_0's binary_logloss: 0.49766
[420]	valid_0's binary_logloss: 0.49766
[421]	valid_0's binary_logloss: 0.49766
[422]	valid_0's binary_logloss: 0.49766
[423]	valid_0's binary_logloss: 0.49766
[424]	valid_0's binary_logloss: 0.49766
[425]	valid_0's binary_logloss: 0.49766
[426]	valid_0's binary_logloss: 0.49766
[427]	valid_0's binary_logloss: 0.49766
[428]	valid_0's binary_logloss: 0.49766
[429]	valid_0's binary_logloss: 0.49766
[430]	valid_0's binary_logloss: 0.49766
[431]	valid_0's binary_logloss: 0.49766
[432]	valid_0's binary_logloss: 0.49766
[433]	valid_0's binary_logloss: 0.49766
[434]	valid_0's binary_logloss: 0.49766




[5]	valid_0's binary_logloss: 0.593179
[6]	valid_0's binary_logloss: 0.582454
[7]	valid_0's binary_logloss: 0.574724
[8]	valid_0's binary_logloss: 0.567639
[9]	valid_0's binary_logloss: 0.562547
[10]	valid_0's binary_logloss: 0.557424
[11]	valid_0's binary_logloss: 0.552888
[12]	valid_0's binary_logloss: 0.547874
[13]	valid_0's binary_logloss: 0.544629
[14]	valid_0's binary_logloss: 0.540959
[15]	valid_0's binary_logloss: 0.5383
[16]	valid_0's binary_logloss: 0.535035
[17]	valid_0's binary_logloss: 0.532011
[18]	valid_0's binary_logloss: 0.530166
[19]	valid_0's binary_logloss: 0.528208
[20]	valid_0's binary_logloss: 0.526045
[21]	valid_0's binary_logloss: 0.524065
[22]	valid_0's binary_logloss: 0.522031
[23]	valid_0's binary_logloss: 0.51996
[24]	valid_0's binary_logloss: 0.517953
[25]	valid_0's binary_logloss: 0.516197
[26]	valid_0's binary_logloss: 0.514684
[27]	valid_0's binary_logloss: 0.513215
[28]	valid_0's binary_logloss: 0.511702
[29]	valid_0's binary_logloss: 0.510835
[30]	val

[32m[I 2021-12-12 18:08:01,957][0m Trial 15 finished with value: 0.5017148084216796 and parameters: {'device_type': 'gpu', 'max_depth': 12, 'num_leaves': 20, 'min_data_in_leaf': 800, 'learning_rate': 0.16171082872885792, 'n_estimators': 500, 'lambda_l1': 35, 'lambda_l2': 45, 'min_gain_to_split': 2.4780502572104197}. Best is trial 13 with value: 0.4916794108004551.[0m


[430]	valid_0's binary_logloss: 0.506076
[431]	valid_0's binary_logloss: 0.506076
[432]	valid_0's binary_logloss: 0.506076
[433]	valid_0's binary_logloss: 0.506076
[434]	valid_0's binary_logloss: 0.506076
[435]	valid_0's binary_logloss: 0.506076
[436]	valid_0's binary_logloss: 0.506076
[437]	valid_0's binary_logloss: 0.506076
[438]	valid_0's binary_logloss: 0.506076
[439]	valid_0's binary_logloss: 0.506076
[440]	valid_0's binary_logloss: 0.506076
[441]	valid_0's binary_logloss: 0.506076
[442]	valid_0's binary_logloss: 0.506076
[443]	valid_0's binary_logloss: 0.506076
[444]	valid_0's binary_logloss: 0.506076
[445]	valid_0's binary_logloss: 0.506076
[446]	valid_0's binary_logloss: 0.506076
[447]	valid_0's binary_logloss: 0.506076
[448]	valid_0's binary_logloss: 0.506076
[449]	valid_0's binary_logloss: 0.506076
[450]	valid_0's binary_logloss: 0.506076
[451]	valid_0's binary_logloss: 0.506076
[452]	valid_0's binary_logloss: 0.506076
[453]	valid_0's binary_logloss: 0.506076
[454]	valid_0's 



[2]	valid_0's binary_logloss: 0.661916
[3]	valid_0's binary_logloss: 0.649328
[4]	valid_0's binary_logloss: 0.638312
[5]	valid_0's binary_logloss: 0.628286
[6]	valid_0's binary_logloss: 0.619261
[7]	valid_0's binary_logloss: 0.611133
[8]	valid_0's binary_logloss: 0.60347
[9]	valid_0's binary_logloss: 0.596897
[10]	valid_0's binary_logloss: 0.590848
[11]	valid_0's binary_logloss: 0.585305
[12]	valid_0's binary_logloss: 0.579835
[13]	valid_0's binary_logloss: 0.575422
[14]	valid_0's binary_logloss: 0.571209
[15]	valid_0's binary_logloss: 0.566907
[16]	valid_0's binary_logloss: 0.563369
[17]	valid_0's binary_logloss: 0.559901
[18]	valid_0's binary_logloss: 0.556774
[19]	valid_0's binary_logloss: 0.553522
[20]	valid_0's binary_logloss: 0.551006
[21]	valid_0's binary_logloss: 0.548625
[22]	valid_0's binary_logloss: 0.546109
[23]	valid_0's binary_logloss: 0.543523
[24]	valid_0's binary_logloss: 0.541369
[25]	valid_0's binary_logloss: 0.539458
[26]	valid_0's binary_logloss: 0.537423
[27]	vali

[32m[I 2021-12-12 18:08:05,686][0m Trial 16 finished with value: 0.5051315070226481 and parameters: {'device_type': 'gpu', 'max_depth': 12, 'num_leaves': 2520, 'min_data_in_leaf': 200, 'learning_rate': 0.07598352464478855, 'n_estimators': 500, 'lambda_l1': 30, 'lambda_l2': 55, 'min_gain_to_split': 6.568559750262673}. Best is trial 13 with value: 0.4916794108004551.[0m


[442]	valid_0's binary_logloss: 0.507529
[443]	valid_0's binary_logloss: 0.507529
[444]	valid_0's binary_logloss: 0.507529
[445]	valid_0's binary_logloss: 0.507529
[446]	valid_0's binary_logloss: 0.507529
[447]	valid_0's binary_logloss: 0.507529
[448]	valid_0's binary_logloss: 0.507529
[449]	valid_0's binary_logloss: 0.507529
[450]	valid_0's binary_logloss: 0.507529
[451]	valid_0's binary_logloss: 0.507529
[452]	valid_0's binary_logloss: 0.507529
[453]	valid_0's binary_logloss: 0.507529
[454]	valid_0's binary_logloss: 0.507529
[455]	valid_0's binary_logloss: 0.507529
[456]	valid_0's binary_logloss: 0.507529
[457]	valid_0's binary_logloss: 0.507529
[458]	valid_0's binary_logloss: 0.507529
[459]	valid_0's binary_logloss: 0.507529
[460]	valid_0's binary_logloss: 0.507529
[461]	valid_0's binary_logloss: 0.507529
[462]	valid_0's binary_logloss: 0.507529
[463]	valid_0's binary_logloss: 0.507529
[464]	valid_0's binary_logloss: 0.507529
[465]	valid_0's binary_logloss: 0.507529
[466]	valid_0's 



[2]	valid_0's binary_logloss: 0.642559
[3]	valid_0's binary_logloss: 0.625023
[4]	valid_0's binary_logloss: 0.613223
[5]	valid_0's binary_logloss: 0.603212
[6]	valid_0's binary_logloss: 0.594145
[7]	valid_0's binary_logloss: 0.587321
[8]	valid_0's binary_logloss: 0.581155
[9]	valid_0's binary_logloss: 0.575334
[10]	valid_0's binary_logloss: 0.570967
[11]	valid_0's binary_logloss: 0.567296
[12]	valid_0's binary_logloss: 0.562726
[13]	valid_0's binary_logloss: 0.559853
[14]	valid_0's binary_logloss: 0.556366
[15]	valid_0's binary_logloss: 0.554153
[16]	valid_0's binary_logloss: 0.550795
[17]	valid_0's binary_logloss: 0.549432
[18]	valid_0's binary_logloss: 0.547506
[19]	valid_0's binary_logloss: 0.545282
[20]	valid_0's binary_logloss: 0.543482
[21]	valid_0's binary_logloss: 0.541758
[22]	valid_0's binary_logloss: 0.54101
[23]	valid_0's binary_logloss: 0.539174
[24]	valid_0's binary_logloss: 0.538432
[25]	valid_0's binary_logloss: 0.537089
[26]	valid_0's binary_logloss: 0.535624
[27]	vali

[32m[I 2021-12-12 18:08:07,832][0m Trial 17 finished with value: 0.5339088573595764 and parameters: {'device_type': 'gpu', 'max_depth': 10, 'num_leaves': 1520, 'min_data_in_leaf': 2200, 'learning_rate': 0.16309084234127275, 'n_estimators': 500, 'lambda_l1': 55, 'lambda_l2': 35, 'min_gain_to_split': 11.608260811927831}. Best is trial 13 with value: 0.4916794108004551.[0m


[238]	valid_0's binary_logloss: 0.534846
[239]	valid_0's binary_logloss: 0.534846
[240]	valid_0's binary_logloss: 0.534846
[241]	valid_0's binary_logloss: 0.534846
[242]	valid_0's binary_logloss: 0.534846
[243]	valid_0's binary_logloss: 0.534846
[244]	valid_0's binary_logloss: 0.534846
[245]	valid_0's binary_logloss: 0.534846
[246]	valid_0's binary_logloss: 0.534846
[247]	valid_0's binary_logloss: 0.534846
[248]	valid_0's binary_logloss: 0.534846
[249]	valid_0's binary_logloss: 0.534846
[250]	valid_0's binary_logloss: 0.534846
[251]	valid_0's binary_logloss: 0.534846
[252]	valid_0's binary_logloss: 0.534846
[253]	valid_0's binary_logloss: 0.534846
[254]	valid_0's binary_logloss: 0.534846
[255]	valid_0's binary_logloss: 0.534846
[256]	valid_0's binary_logloss: 0.534846
[257]	valid_0's binary_logloss: 0.534846
[258]	valid_0's binary_logloss: 0.534846
[259]	valid_0's binary_logloss: 0.534846
[260]	valid_0's binary_logloss: 0.534846
[261]	valid_0's binary_logloss: 0.534846
[262]	valid_0's 



[1]	valid_0's binary_logloss: 0.653583
[2]	valid_0's binary_logloss: 0.6271
[3]	valid_0's binary_logloss: 0.608036
[4]	valid_0's binary_logloss: 0.594636
[5]	valid_0's binary_logloss: 0.58375
[6]	valid_0's binary_logloss: 0.57549
[7]	valid_0's binary_logloss: 0.567552
[8]	valid_0's binary_logloss: 0.560959
[9]	valid_0's binary_logloss: 0.554651
[10]	valid_0's binary_logloss: 0.549768
[11]	valid_0's binary_logloss: 0.546189
[12]	valid_0's binary_logloss: 0.541845
[13]	valid_0's binary_logloss: 0.539293
[14]	valid_0's binary_logloss: 0.536439
[15]	valid_0's binary_logloss: 0.534719
[16]	valid_0's binary_logloss: 0.532459
[17]	valid_0's binary_logloss: 0.530168
[18]	valid_0's binary_logloss: 0.527965
[19]	valid_0's binary_logloss: 0.526386
[20]	valid_0's binary_logloss: 0.525455
[21]	valid_0's binary_logloss: 0.524432
[22]	valid_0's binary_logloss: 0.522998
[23]	valid_0's binary_logloss: 0.522238
[24]	valid_0's binary_logloss: 0.520802
[25]	valid_0's binary_logloss: 0.520047
[26]	valid_0'

[32m[I 2021-12-12 18:08:10,171][0m Trial 18 finished with value: 0.5222869598181756 and parameters: {'device_type': 'gpu', 'max_depth': 11, 'num_leaves': 2470, 'min_data_in_leaf': 1800, 'learning_rate': 0.23037128906089233, 'n_estimators': 500, 'lambda_l1': 45, 'lambda_l2': 55, 'min_gain_to_split': 6.72111367726975}. Best is trial 13 with value: 0.4916794108004551.[0m


[218]	valid_0's binary_logloss: 0.516274
[219]	valid_0's binary_logloss: 0.516274
[220]	valid_0's binary_logloss: 0.516274
[221]	valid_0's binary_logloss: 0.516274
[222]	valid_0's binary_logloss: 0.516274
[223]	valid_0's binary_logloss: 0.516274
[224]	valid_0's binary_logloss: 0.516274
[225]	valid_0's binary_logloss: 0.516274
[226]	valid_0's binary_logloss: 0.516274
[227]	valid_0's binary_logloss: 0.516274
[228]	valid_0's binary_logloss: 0.516274
[229]	valid_0's binary_logloss: 0.516274
[230]	valid_0's binary_logloss: 0.516274
[231]	valid_0's binary_logloss: 0.516274
[232]	valid_0's binary_logloss: 0.516274
[233]	valid_0's binary_logloss: 0.516274
[234]	valid_0's binary_logloss: 0.516274
[235]	valid_0's binary_logloss: 0.516274
[236]	valid_0's binary_logloss: 0.516274
[237]	valid_0's binary_logloss: 0.516274
[238]	valid_0's binary_logloss: 0.516274
[239]	valid_0's binary_logloss: 0.516274
[240]	valid_0's binary_logloss: 0.516274
[241]	valid_0's binary_logloss: 0.516274
[242]	valid_0's 



[1]	valid_0's binary_logloss: 0.690985
[2]	valid_0's binary_logloss: 0.688872
[3]	valid_0's binary_logloss: 0.686808
[4]	valid_0's binary_logloss: 0.684786
[5]	valid_0's binary_logloss: 0.682814
[6]	valid_0's binary_logloss: 0.680878
[7]	valid_0's binary_logloss: 0.678984
[8]	valid_0's binary_logloss: 0.67714
[9]	valid_0's binary_logloss: 0.675349
[10]	valid_0's binary_logloss: 0.673572
[11]	valid_0's binary_logloss: 0.671845
[12]	valid_0's binary_logloss: 0.670144
[13]	valid_0's binary_logloss: 0.668502
[14]	valid_0's binary_logloss: 0.666871
[15]	valid_0's binary_logloss: 0.665275
[16]	valid_0's binary_logloss: 0.663714
[17]	valid_0's binary_logloss: 0.662182
[18]	valid_0's binary_logloss: 0.660704
[19]	valid_0's binary_logloss: 0.659238
[20]	valid_0's binary_logloss: 0.6578
[21]	valid_0's binary_logloss: 0.656392
[22]	valid_0's binary_logloss: 0.655034
[23]	valid_0's binary_logloss: 0.653689
[24]	valid_0's binary_logloss: 0.652395
[25]	valid_0's binary_logloss: 0.651128
[26]	valid_0

[32m[I 2021-12-12 18:08:15,887][0m Trial 19 finished with value: 0.5328984738477942 and parameters: {'device_type': 'gpu', 'max_depth': 9, 'num_leaves': 2720, 'min_data_in_leaf': 3600, 'learning_rate': 0.011612212124944205, 'n_estimators': 500, 'lambda_l1': 25, 'lambda_l2': 40, 'min_gain_to_split': 2.9894426838370376}. Best is trial 13 with value: 0.4916794108004551.[0m


[462]	valid_0's binary_logloss: 0.529424
[463]	valid_0's binary_logloss: 0.529357
[464]	valid_0's binary_logloss: 0.529291
[465]	valid_0's binary_logloss: 0.529203
[466]	valid_0's binary_logloss: 0.529149
[467]	valid_0's binary_logloss: 0.529085
[468]	valid_0's binary_logloss: 0.528989
[469]	valid_0's binary_logloss: 0.528951
[470]	valid_0's binary_logloss: 0.528904
[471]	valid_0's binary_logloss: 0.52885
[472]	valid_0's binary_logloss: 0.528809
[473]	valid_0's binary_logloss: 0.528739
[474]	valid_0's binary_logloss: 0.528647
[475]	valid_0's binary_logloss: 0.528561
[476]	valid_0's binary_logloss: 0.528482
[477]	valid_0's binary_logloss: 0.528438
[478]	valid_0's binary_logloss: 0.528377
[479]	valid_0's binary_logloss: 0.528299
[480]	valid_0's binary_logloss: 0.528215
[481]	valid_0's binary_logloss: 0.528158
[482]	valid_0's binary_logloss: 0.528098
[483]	valid_0's binary_logloss: 0.528063
[484]	valid_0's binary_logloss: 0.527989
[485]	valid_0's binary_logloss: 0.527921
[486]	valid_0's b

In [12]:
study_summary(study)

Study:  lgbm_split

Number of finished trials: 20
Best trial:
  Validation score: 0.4916794108004551
  Params: 
    device_type: gpu
    max_depth: 12
    num_leaves: 2520
    min_data_in_leaf: 300
    learning_rate: 0.2011884316999342
    n_estimators: 500
    lambda_l1: 30
    lambda_l2: 35
    min_gain_to_split: 2.7591039026962063


In [13]:
# pass the optuna best parameters as hyperparameters
best_clf = train_best_lgbm(projected_train, study.best_params)

In [14]:
best_clf.__dict__

{'boosting_type': 'gbdt',
 'objective': 'binary',
 'num_leaves': 2520,
 'max_depth': 12,
 'learning_rate': 0.2011884316999342,
 'n_estimators': 500,
 'subsample_for_bin': 200000,
 'min_split_gain': 0.0,
 'min_child_weight': 0.001,
 'min_child_samples': 20,
 'subsample': 1.0,
 'subsample_freq': 0,
 'colsample_bytree': 1.0,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'random_state': None,
 'n_jobs': -1,
 'silent': 'warn',
 'importance_type': 'split',
 '_Booster': <lightgbm.basic.Booster at 0x1e36012d520>,
 '_evals_result': None,
 '_best_score': defaultdict(collections.OrderedDict, {}),
 '_best_iteration': None,
 '_other_params': {'device_type': 'gpu',
  'min_data_in_leaf': 300,
  'lambda_l1': 30,
  'lambda_l2': 35,
  'min_gain_to_split': 2.7591039026962063},
 '_objective': 'binary',
 'class_weight': None,
 '_class_weight': None,
 '_class_map': {0.0: 0, 1.0: 1},
 '_n_features': 20,
 '_n_features_in': 20,
 '_classes': array([0., 1.]),
 '_n_classes': 2,
 'device_type': 'gpu',
 'min_data_in_leaf

In [15]:
def standard_scaling(train_df, test_df):
    standard_scaler = StandardScaler()
    feature_cols = [col for col in train_df.columns if col != "LABELS"]
    
    train_df[feature_cols] = standard_scaler.fit_transform(train_df[feature_cols])
    test_df[feature_cols] = standard_scaler.transform(test_df[feature_cols])

    return train_df, test_df

In [16]:
agg_test = agg_over_months(test_df, freq=3)
scaled_train, scaled_test = standard_scaling(agg_train, agg_test)

KeyError: "None of [Index(['NDVI_06_mean', 'NDVI_612_mean', 'S2_B4_06_mean', 'S2_B4_612_mean',\n       'S2_B2_06_mean', 'S2_B2_612_mean', 'S2_B12_06_mean', 'S2_B12_612_mean',\n       'ERA5_total_precipitation_06_mean', 'ERA5_total_precipitation_612_mean',\n       'topo_elevation_06_mean', 'topo_elevation_612_mean', 'S2_B11_06_mean',\n       'S2_B11_612_mean', 'S2_B9_06_mean', 'S2_B9_612_mean', 'S1_VV_06_mean',\n       'S1_VV_612_mean', 'S1_VH_06_mean', 'S1_VH_612_mean', 'S2_B6_06_mean',\n       'S2_B6_612_mean', 'S2_B7_06_mean', 'S2_B7_612_mean',\n       'ERA5_temperature_2m_06_mean', 'ERA5_temperature_2m_612_mean',\n       'topo_slope_06_mean', 'topo_slope_612_mean', 'S2_B5_06_mean',\n       'S2_B5_612_mean', 'S2_B8A_06_mean', 'S2_B8A_612_mean', 'S2_B3_06_mean',\n       'S2_B3_612_mean', 'S2_B8_06_mean', 'S2_B8_612_mean'],\n      dtype='object')] are in the [columns]"

In [69]:
preds = best_clf.predict(agg_test)

# Save prediction

In [71]:
save_pred(projected_test, preds, "7")