In [1]:
import numpy as np
import pandas as pd
import os
import pickle
import gc

import ydata_profiling as pdp

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder

from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import lightgbm as lgb

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
df_train = pd.read_csv('train.csv')
print(df_train.shape)
df_train.head()

(891, 12)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
df_train.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [3]:
#ベースライン作成
#目的変数と説明変数の設定
x_train, y_train, id_train = df_train[['Pclass', 'Fare']], df_train[['Survived']], df_train[['PassengerId']]
id_train.head()

Unnamed: 0,PassengerId
0,1
1,2
2,3
3,4
4,5


In [17]:
#ホールドアウト検証
x_tr, x_va, y_tr, y_va = train_test_split(x_train, y_train, test_size=0.2, shuffle=True, stratify=y_train, random_state=123)
print(x_tr.shape, y_tr.shape)
print(x_va.shape, y_va.shape)
print('y_train:{:.3f}, y_tr:{:.3f}, y_va:{:.3f}'.format(
    y_train['Survived'].mean(),
    y_tr['Survived'].mean(),
    y_va['Survived'].mean(),
))

(712, 2) (712, 1)
(179, 2) (179, 1)
y_train:0.384, y_tr:0.383, y_va:0.385


In [20]:
#ハイパーパラメータ

params = {
    'boosting_type' : 'gbdt',
    'objective' : 'binary',
    'metric' : 'auc',
    'learning_rate' : 0.1,
    'num_leaves' : 16,
    'n_estimators' : 100000,
    'random_state' : 123,
    'importance_type' : 'gain',
}

model = lgb.LGBMClassifier(**params)
model.fit(x_tr,
          y_tr,
          eval_set=[(x_tr, y_tr), (x_va, y_va)],
          callbacks=[lgb.early_stopping(stopping_rounds=100)],
         )

[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000077 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 120
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[35]	training's auc: 0.81342	valid_1's auc: 0.761265


In [23]:
y_tr_pred = model.predict(x_tr)
y_va_pred = model.predict(x_va)
metric_tr = accuracy_score(y_tr, y_tr_pred)
metric_va = accuracy_score(y_va, y_va_pred)
print(f'tr:{metric_tr},va:{metric_va}')

tr:0.7514044943820225,va:0.7262569832402235


In [25]:
imp = pd.DataFrame({'col':x_train.columns, 'imp':model.feature_importances_, 'nfold':nfold})
imp.sort_values('imp',ascending=False, ignore_index=True)

Unnamed: 0,col,imp
0,Fare,903.440373
1,Pclass,229.457186


In [16]:
params = {
    'boosting_type' : 'gbdt',
    'objective' : 'binary',
    'metric' : 'auc',
    'learning_rate' : 0.2,
    'num_leaves' : 16,
    'n_estimators' : 100000,
    'random_state' : 123,
    'importance_type' : 'gain',
    'bagging_freq' : 1,
    'seed' : 123,
}



#クロスバリデーション

def train_cv(params):
    metrics = []
    imp = pd.DataFrame()

    cv = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=123).split(x_train, y_train))

    for nfold in np.arange(5):
        print('-'*20,nfold,'-'*20)
        idx_tr, idx_va = cv[nfold][0],cv[nfold][1]
        x_tr, y_tr = x_train.loc[idx_tr,:], y_train.loc[idx_tr,:]
        x_va, y_va = x_train.loc[idx_va,:], y_train.loc[idx_va,:]
        print(x_tr.shape, y_tr.shape)
        print(x_va.shape, y_va.shape)
        print('y_train:{:.3f}, y_tr:{:.3f}, y_va:{:.3f}'.format(
            y_train['Survived'].mean(),
            y_tr['Survived'].mean(),
            y_va['Survived'].mean(),
            ))   

        model = lgb.LGBMClassifier(**params)
        model.fit(x_tr,
              y_tr,
              eval_set=[(x_tr, y_tr), (x_va, y_va)],
              callbacks=[lgb.early_stopping(stopping_rounds=100)],
             )
        
        y_tr_pred = model.predict(x_tr)
        y_va_pred = model.predict(x_va)
        metric_tr = accuracy_score(y_tr, y_tr_pred)
        metric_va = accuracy_score(y_va, y_va_pred)
        print(f'tr:{metric_tr},va:{metric_va}')
        metrics.append([nfold, metric_tr, metric_va])
        
        _imp = pd.DataFrame({'col':input_x.columns, 'imp':model.feature_importances_, 'nfold':nfold})
        imp = pd.concat([imp,_imp], axis=0, ignore_index=True)

    print('-'*20,'result','-'*20)
    metrics = np.array(metrics)
    print(metrics)

    print('[cv]tr:{:.2f}+-{:.2f}, va:{:.2f}+-{:.2f}'.format(
        metrics[:,1].mean(), metrics[:,1].std(),
        metrics[:,2].mean(), metrics[:,2].std(),
    ))

    imp = imp.groupby('col')['imp'].agg(['mean','std'])
    imp.columns = ['imp', 'imp_std']
    imp = imp.reset_index(drop=False)

    print('Done')

    return imp, metrics

In [7]:
imp, metrics  = train_cv(x_train,y_train,id_train,params,n_splits=5)

-------------------- 0 --------------------
(712, 2) (712, 1)
(179, 2) (179, 1)
y_train:0.384, y_tr:0.383, y_va:0.385
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 123
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[12]	training's auc: 0.793779	valid_1's auc: 0.740382
tr:0.7205056179775281,va:0.6759776536312849
-------------------- 1 --------------------
(713, 2) (713, 1)
(178, 2) (178, 1)
y_train:0.384, y_tr:0.384, y_va:0.382
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info

In [17]:
x_train, y_train, id_train = df_train[['Pclass', 'Fare','Age']], df_train[['Survived']], df_train[['PassengerId']]
print(x_train.shape, y_train.shape, id_train.shape)

(891, 3) (891, 1) (891, 1)


In [18]:
imp, metrics  = train_cv(x_train,y_train,id_train,params,n_splits=5)

-------------------- 0 --------------------
(712, 3) (712, 1)
(179, 3) (179, 1)
y_train:0.384, y_tr:0.383, y_va:0.385
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[69]	training's auc: 0.913235	valid_1's auc: 0.72892
tr:0.824438202247191,va:0.7039106145251397
-------------------- 1 --------------------
(713, 3) (713, 1)
(178, 3) (178, 1)
y_train:0.384, y_tr:0.384, y_va:0.382
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] 

In [20]:
imp.sort_values('imp', ascending=False, ignore_index=True)

Unnamed: 0,col,imp,imp_std
0,Fare,547.621958,270.958097
1,Age,436.497719,247.807858
2,Pclass,299.843845,103.344422


In [23]:
#データの確認
df_train.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
PassengerId,891.0,,,,446.0,257.353842,1.0,223.5,446.0,668.5,891.0
Survived,891.0,,,,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0
Pclass,891.0,,,,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0
Name,891.0,891.0,"Braund, Mr. Owen Harris",1.0,,,,,,,
Sex,891.0,2.0,male,577.0,,,,,,,
Age,714.0,,,,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0
SibSp,891.0,,,,0.523008,1.102743,0.0,0.0,0.0,1.0,8.0
Parch,891.0,,,,0.381594,0.806057,0.0,0.0,0.0,0.0,6.0
Ticket,891.0,681.0,347082,7.0,,,,,,,
Fare,891.0,,,,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292


In [None]:
pdp.ProfileReport(df_train)

In [31]:
#欠損値の補完 平均値
df_train['Age']=df_train['Age'].fillna(df_train['Age'].mean())
df_train['Age'].isnull().sum()

0

In [32]:
#欠損値の補完　空白埋め
df_train['Cabin']=df_train['Cabin'].fillna('')
df_train['Embarked']=df_train['Embarked'].fillna('')
df_train['Cabin'].isnull().sum()

0

In [34]:
#標準化
std = StandardScaler()
std.fit(df_train[['Fare']])
df_train['Fare_standard']=std.transform(df_train[['Fare']])
df_train[['Fare', 'Fare_standard']].head()

Unnamed: 0,Fare,Fare_standard
0,7.25,-0.502445
1,71.2833,0.786845
2,7.925,-0.488854
3,53.1,0.42073
4,8.05,-0.486337


In [36]:
#label encoding
le_embarked = LabelEncoder()
le_embarked.fit(df_train['Embarked'])
df_train['Embarked_le'] = le_embarked.transform(df_train['Embarked'])
df_train[['Embarked','Embarked_le']].head()

Unnamed: 0,Embarked,Embarked_le
0,S,2
1,C,0
2,S,2
3,S,2
4,S,2


In [37]:
df_train['Family'] = df_train['SibSp'] + df_train['Parch']
df_train[['SibSp','Parch', 'Family']].head()

Unnamed: 0,SibSp,Parch,Family
0,1,0,1
1,1,0,1
2,0,0,0
3,1,0,1
4,0,0,0


In [39]:
#性別ごとの運賃
df_train['mean_Fare_by_Sex'] = df_train.groupby('Sex')['Fare'].transform('mean')
df_train[['Sex', 'mean_Fare_by_Sex']].head()

Unnamed: 0,Sex,mean_Fare_by_Sex
0,male,25.523893
1,female,44.479818
2,female,44.479818
3,female,44.479818
4,male,25.523893


In [42]:
#性別ごとの港
df_train['count_Sex_x_Embarked'] = df_train.groupby(['Sex', 'Embarked'])['PassengerId'].transform('count')
df_train[['Sex', 'Embarked', 'count_Sex_x_Embarked']].head()

Unnamed: 0,Sex,Embarked,count_Sex_x_Embarked
0,male,S,441.0
1,female,C,73.0
2,female,S,203.0
3,female,S,203.0
4,male,S,441.0


In [41]:
#ハイパーパラメータの自動調整
import optuna

In [43]:
params_base = {
    'boosting_type' : 'gbdt',
    'objective' : 'binary',
    'metric' : 'auc',
    'learning_rate' : 0.2,
    'num_leaves' : 16,
    'n_estimators' : 100000,
    'random_state' : 123,
    'importance_type' : 'gain',
    'bagging_freq' : 1,
    'seed' : 123,
}



#クロスバリデーション

def objective(trial):

    params_tuning = {
        'num_leaves' : trial.suggest_int('num_leaves', 8, 256),
        'min_data_in_leaf' : trial.suggest_int('min_data_in_leaf', 5, 200),
        'min_sum_hession_in_leaf' : trial.suggest_float('min_sum_hession_in_leaf', 1e-5, 1e-2, log=True),
        'feature_fraction' : trial.suggest_float('feature_fraction', 0.5, 1.0),
        'bagging_fraction' : trial.suggest_float('bagging_fraction', 0.5, 1.0),
        'lambda_l1' : trial.suggest_float('lambda_l1', 1e-2, 1e2, log=True),
        'lambda_l2' : trial.suggest_float('lambda_l2', 1e-2, 1e2, log=True),
    }
    params_tuning.update(params_base)
    
    list_metrics = []
    imp = pd.DataFrame()

    cv = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=123).split(x_train, y_train))

    for nfold in np.arange(5):
        idx_tr, idx_va = cv[nfold][0],cv[nfold][1]
        x_tr, y_tr = x_train.loc[idx_tr,:], y_train.loc[idx_tr,:]
        x_va, y_va = x_train.loc[idx_va,:], y_train.loc[idx_va,:]   

        model = lgb.LGBMClassifier(**params_tuning)
        model.fit(x_tr,
                  y_tr,
                  eval_set=[(x_tr, y_tr), (x_va, y_va)],
                  callbacks=[lgb.early_stopping(stopping_rounds=100)],
                 )
        
        y_va_pred = model.predict_proba(x_va)[:,1]
        metric_va = accuracy_score(y_va, np.where(y_va_pred>=0.5, 1, 0))
        list_metrics.append(metric_va)

    metrics = np.mean(list_metrics)

    return metrics

In [44]:
sampler = optuna.samplers.TPESampler(seed=123)
study = optuna.create_study(sampler=sampler, direction='maximize')
study.optimize(objective, n_trials=30)

[I 2024-05-30 19:20:31,200] A new study created in memory with name: no-name-da8d76f4-ed79-407f-9316-c7ba7461a96f


[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000055 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[320]	training's auc: 0.832954	valid_1's auc: 0.740909
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM]

[I 2024-05-30 19:20:32,113] Trial 0 finished with value: 0.7059632163705982 and parameters: {'num_leaves': 181, 'min_data_in_leaf': 61, 'min_sum_hession_in_leaf': 4.792414358623587e-05, 'feature_fraction': 0.7756573845414456, 'bagging_fraction': 0.8597344848927815, 'lambda_l1': 0.492522233779106, 'lambda_l2': 83.76388146302445}. Best is trial 0 with value: 0.7059632163705982.


[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[103]	training's auc: 0.788666	valid_1's auc: 0.775761
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM]

[I 2024-05-30 19:20:32,655] Trial 1 finished with value: 0.7082041303119704 and parameters: {'num_leaves': 178, 'min_data_in_leaf': 99, 'min_sum_hession_in_leaf': 0.00015009027543233888, 'feature_fraction': 0.6715890080754348, 'bagging_fraction': 0.8645248536920208, 'lambda_l1': 0.567922374174008, 'lambda_l2': 0.01732652966363563}. Best is trial 1 with value: 0.7082041303119704.


[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[6]	training's auc: 0.726382	valid_1's auc: 0.779019
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000050 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [

[I 2024-05-30 19:20:33,229] Trial 2 finished with value: 0.7126796811248509 and parameters: {'num_leaves': 107, 'min_data_in_leaf': 149, 'min_sum_hession_in_leaf': 3.52756635172055e-05, 'feature_fraction': 0.5877258780737462, 'bagging_fraction': 0.7657756869209191, 'lambda_l1': 1.3406343673102123, 'lambda_l2': 3.4482904089131434}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[26]	training's auc: 0.729033	valid_1's auc: 0.757479
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000341 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Light

[I 2024-05-30 19:20:33,504] Trial 3 finished with value: 0.6991965350574352 and parameters: {'num_leaves': 219, 'min_data_in_leaf': 146, 'min_sum_hession_in_leaf': 0.0006808799287054756, 'feature_fraction': 0.8612216912851107, 'bagging_fraction': 0.6614794569265892, 'lambda_l1': 0.2799978022399009, 'lambda_l2': 0.08185645330667264}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000050 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[50]	training's auc: 0.757902	valid_1's auc: 0.724198
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000013 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data poin

[I 2024-05-30 19:20:33,957] Trial 4 finished with value: 0.7025484903646977 and parameters: {'num_leaves': 81, 'min_data_in_leaf': 128, 'min_sum_hession_in_leaf': 1.889360449174926e-05, 'feature_fraction': 0.7168505863397641, 'bagging_fraction': 0.7154313816648219, 'lambda_l1': 0.9434967110751797, 'lambda_l2': 0.5050346330980694}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[138]	training's auc: 0.787394	valid_1's auc: 0.732353
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[61]	training's auc: 0.767226	valid_1's auc: 0.723596
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000036 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] 

[I 2024-05-30 19:20:34,369] Trial 5 finished with value: 0.7070930889460799 and parameters: {'num_leaves': 85, 'min_data_in_leaf': 88, 'min_sum_hession_in_leaf': 0.004788147156768277, 'feature_fraction': 0.9720800091019398, 'bagging_fraction': 0.7509183379421682, 'lambda_l1': 3.1319282717196035, 'lambda_l2': 0.029005047452739414}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[81]	training's auc: 0.795579	valid_1's auc: 0.74639
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[6]	training's auc: 0.739542	valid_1's auc: 0.725668
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [In

[I 2024-05-30 19:20:34,560] Trial 6 finished with value: 0.6161634548992531 and parameters: {'num_leaves': 87, 'min_data_in_leaf': 86, 'min_sum_hession_in_leaf': 0.003971252247766701, 'feature_fraction': 0.6252276826982534, 'bagging_fraction': 0.7415171321313522, 'lambda_l1': 87.54657140659076, 'lambda_l2': 1.1965765212602313}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000747 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's auc: 0.5	valid_1's auc: 0.5
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 189
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [bin

[I 2024-05-30 19:20:35,208] Trial 7 finished with value: 0.6868746469148201 and parameters: {'num_leaves': 160, 'min_data_in_leaf': 28, 'min_sum_hession_in_leaf': 0.0030131614432849746, 'feature_fraction': 0.8015300642054637, 'bagging_fraction': 0.7725340032332324, 'lambda_l1': 0.23499322154972468, 'lambda_l2': 0.1646202117975735}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[245]	training's auc: 0.951769	valid_1's auc: 0.756283
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[6]	training's auc: 0.790851	valid_1's auc: 0.772437
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000036 seconds.
You can set `force_col_wise=true` to re

[I 2024-05-30 19:20:35,595] Trial 8 finished with value: 0.6936915447868934 and parameters: {'num_leaves': 111, 'min_data_in_leaf': 138, 'min_sum_hession_in_leaf': 0.00423029374725911, 'feature_fraction': 0.7552111687390055, 'bagging_fraction': 0.8346568914811361, 'lambda_l1': 2.206714812711709, 'lambda_l2': 3.1594683442464033}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[447]	training's auc: 0.804915	valid_1's auc: 0.746524
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[103]	training's auc: 0.777634	valid_1's auc: 0.71758
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] 

[I 2024-05-30 19:20:35,842] Trial 9 finished with value: 0.6991965350574352 and parameters: {'num_leaves': 175, 'min_data_in_leaf': 170, 'min_sum_hession_in_leaf': 1.7765808030254076e-05, 'feature_fraction': 0.8818414207216692, 'bagging_fraction': 0.6218331872684371, 'lambda_l1': 0.05982625838323253, 'lambda_l2': 1.9490717640641542}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[40]	training's auc: 0.73267	valid_1's auc: 0.724866
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 189
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [

[I 2024-05-30 19:20:36,260] Trial 10 finished with value: 0.6835791852363317 and parameters: {'num_leaves': 32, 'min_data_in_leaf': 200, 'min_sum_hession_in_leaf': 0.00015678703303272353, 'feature_fraction': 0.5040305717020104, 'bagging_fraction': 0.9940542446575643, 'lambda_l1': 0.010612397212799442, 'lambda_l2': 18.289897792948263}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[136]	training's auc: 0.763763	valid_1's auc: 0.713102
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000042 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[26]	training's auc: 0.722548	valid_1's auc: 0.758676
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000050 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] 

[I 2024-05-30 19:20:36,759] Trial 11 finished with value: 0.6959073504488105 and parameters: {'num_leaves': 244, 'min_data_in_leaf': 113, 'min_sum_hession_in_leaf': 0.0001312578895795087, 'feature_fraction': 0.6008920237848668, 'bagging_fraction': 0.9504403697987084, 'lambda_l1': 15.104886297750244, 'lambda_l2': 0.010211649165953135}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[103]	training's auc: 0.743249	valid_1's auc: 0.696123
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000042 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[24]	training's auc: 0.724879	valid_1's auc: 0.76559
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000058 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [

[I 2024-05-30 19:20:37,423] Trial 12 finished with value: 0.6802083987194778 and parameters: {'num_leaves': 129, 'min_data_in_leaf': 169, 'min_sum_hession_in_leaf': 0.0003677187333491435, 'feature_fraction': 0.6476756823987893, 'bagging_fraction': 0.5595408581248555, 'lambda_l1': 8.37591328058755, 'lambda_l2': 5.843388630792373}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[183]	training's auc: 0.715447	valid_1's auc: 0.760803
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000058 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[56]	training's auc: 0.858966	valid_1's auc: 0.747826
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM]

[I 2024-05-30 19:20:38,663] Trial 13 finished with value: 0.6812378381771389 and parameters: {'num_leaves': 8, 'min_data_in_leaf': 55, 'min_sum_hession_in_leaf': 6.010209410036105e-05, 'feature_fraction': 0.5283014454251121, 'bagging_fraction': 0.873103599069803, 'lambda_l1': 0.07159033236171834, 'lambda_l2': 0.38586024213288816}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000067 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[2]	training's auc: 0.732651	valid_1's auc: 0.765723
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [

[I 2024-05-30 19:20:39,512] Trial 14 finished with value: 0.7004331178206014 and parameters: {'num_leaves': 204, 'min_data_in_leaf': 7, 'min_sum_hession_in_leaf': 1.0140499957190339e-05, 'feature_fraction': 0.6817107174591786, 'bagging_fraction': 0.9012240246746928, 'lambda_l1': 4.550632384498536, 'lambda_l2': 14.542595759507835}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[66]	training's auc: 0.807816	valid_1's auc: 0.725134
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000052 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[48]	training's auc: 0.794043	valid_1's auc: 0.772969
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [

[I 2024-05-30 19:20:40,156] Trial 15 finished with value: 0.7014562802083987 and parameters: {'num_leaves': 143, 'min_data_in_leaf': 104, 'min_sum_hession_in_leaf': 0.001215432268892957, 'feature_fraction': 0.5846213889611341, 'bagging_fraction': 0.8005029713330536, 'lambda_l1': 0.0839815781870575, 'lambda_l2': 0.06689775001425292}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[217]	training's auc: 0.844583	valid_1's auc: 0.729813
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[5]	training's auc: 0.726336	valid_1's auc: 0.785467
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [

[I 2024-05-30 19:20:40,393] Trial 16 finished with value: 0.6161634548992531 and parameters: {'num_leaves': 55, 'min_data_in_leaf': 164, 'min_sum_hession_in_leaf': 5.952272896500019e-05, 'feature_fraction': 0.5662963895325848, 'bagging_fraction': 0.511566178836625, 'lambda_l1': 29.117862651347092, 'lambda_l2': 0.01033528363848504}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[32]	training's auc: 0.692458	valid_1's auc: 0.712253
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000050 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[10]	training's auc: 0.695164	valid_1's auc: 0.69639
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 189
[LightGBM] [I

[I 2024-05-30 19:20:41,166] Trial 17 finished with value: 0.6903207582700396 and parameters: {'num_leaves': 123, 'min_data_in_leaf': 71, 'min_sum_hession_in_leaf': 0.00018247080939916779, 'feature_fraction': 0.6824969103765309, 'bagging_fraction': 0.6714967114221069, 'lambda_l1': 1.3188755803207954, 'lambda_l2': 69.44137871429486}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[86]	training's auc: 0.760964	valid_1's auc: 0.774365
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000061 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[11]	training's auc: 0.722918	valid_1's auc: 0.726943
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000036 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [

[I 2024-05-30 19:20:41,666] Trial 18 finished with value: 0.6879856882807106 and parameters: {'num_leaves': 197, 'min_data_in_leaf': 196, 'min_sum_hession_in_leaf': 3.376492032672694e-05, 'feature_fraction': 0.6656375863866135, 'bagging_fraction': 0.920086044126635, 'lambda_l1': 0.01137793236283924, 'lambda_l2': 0.4726182082966624}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[121]	training's auc: 0.760957	valid_1's auc: 0.722326
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[24]	training's auc: 0.721757	valid_1's auc: 0.758543
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] 

[I 2024-05-30 19:20:42,208] Trial 19 finished with value: 0.7003201305630531 and parameters: {'num_leaves': 255, 'min_data_in_leaf': 121, 'min_sum_hession_in_leaf': 0.0003708408360071203, 'feature_fraction': 0.5296770331716041, 'bagging_fraction': 0.8131325158472615, 'lambda_l1': 0.6420140723743026, 'lambda_l2': 7.063364364495365}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[17]	training's auc: 0.72925	valid_1's auc: 0.761933
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[12]	training's auc: 0.718303	valid_1's auc: 0.72892
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [In

[I 2024-05-30 19:20:42,650] Trial 20 finished with value: 0.7036846400100433 and parameters: {'num_leaves': 152, 'min_data_in_leaf': 149, 'min_sum_hession_in_leaf': 9.910221884637917e-05, 'feature_fraction': 0.7191139014183056, 'bagging_fraction': 0.6977974704856518, 'lambda_l1': 0.17854993286950835, 'lambda_l2': 26.431339754051077}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[199]	training's auc: 0.755291	valid_1's auc: 0.763795
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[90]	training's auc: 0.808819	valid_1's auc: 0.725231
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000059 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] 

[I 2024-05-30 19:20:42,959] Trial 21 finished with value: 0.6992279204067542 and parameters: {'num_leaves': 95, 'min_data_in_leaf': 86, 'min_sum_hession_in_leaf': 0.008748025832898375, 'feature_fraction': 0.9936228322884221, 'bagging_fraction': 0.7685670728543237, 'lambda_l1': 2.6344972857854265, 'lambda_l2': 0.02947208283602}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[22]	training's auc: 0.772235	valid_1's auc: 0.74004
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 189
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[147]	training's auc: 0.82325	valid_1's auc: 0.755214
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `forc

[I 2024-05-30 19:20:43,278] Trial 22 finished with value: 0.7093528340970434 and parameters: {'num_leaves': 59, 'min_data_in_leaf': 89, 'min_sum_hession_in_leaf': 0.0012134589356560493, 'feature_fraction': 0.9698628772354745, 'bagging_fraction': 0.8006705378425563, 'lambda_l1': 4.215681874192988, 'lambda_l2': 0.024815794952186165}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.382889 -> initscore=-0.477303
[LightGBM] [Info] Start training from score -0.477303
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[40]	training's auc: 0.772386	valid_1's auc: 0.781213
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] 

[I 2024-05-30 19:20:43,663] Trial 23 finished with value: 0.6970183918147009 and parameters: {'num_leaves': 49, 'min_data_in_leaf': 45, 'min_sum_hession_in_leaf': 0.0012039676213803127, 'feature_fraction': 0.9366816593422715, 'bagging_fraction': 0.839470254441214, 'lambda_l1': 7.09505481948791, 'lambda_l2': 0.025784265762980575}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[48]	training's auc: 0.776565	valid_1's auc: 0.786398
[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[331]	training's auc: 0.857585	valid_1's auc: 0.737352


[I 2024-05-30 19:20:44,039] Trial 24 finished with value: 0.7104450442533425 and parameters: {'num_leaves': 67, 'min_data_in_leaf': 103, 'min_sum_hession_in_leaf': 0.0007958826711101101, 'feature_fraction': 0.8726524286002304, 'bagging_fraction': 0.801598680138682, 'lambda_l1': 1.426694782037866, 'lambda_l2': 0.15562933607555865}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[23]	training's auc: 0.765422	valid_1's auc: 0.736965
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 189
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] 

[I 2024-05-30 19:20:44,469] Trial 25 finished with value: 0.7014813884878539 and parameters: {'num_leaves': 62, 'min_data_in_leaf': 127, 'min_sum_hession_in_leaf': 0.001493923488117155, 'feature_fraction': 0.8979932290605772, 'bagging_fraction': 0.7968084689118976, 'lambda_l1': 1.6147333376563207, 'lambda_l2': 0.1979495469977724}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[281]	training's auc: 0.809271	valid_1's auc: 0.749666
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[113]	training's auc: 0.79698	valid_1's auc: 0.701537
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] 

[I 2024-05-30 19:20:44,824] Trial 26 finished with value: 0.6756135835791852 and parameters: {'num_leaves': 27, 'min_data_in_leaf': 77, 'min_sum_hession_in_leaf': 0.0006647075277842634, 'feature_fraction': 0.8157093336213497, 'bagging_fraction': 0.7264713078346889, 'lambda_l1': 20.653784759596807, 'lambda_l2': 0.059371029602087316}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[466]	training's auc: 0.738236	valid_1's auc: 0.686096
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[37]	training's auc: 0.729711	valid_1's auc: 0.683422
[LightGBM] [Info] Number of positive: 273, number of negative: 440
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] 

[I 2024-05-30 19:20:45,125] Trial 27 finished with value: 0.6947335383842822 and parameters: {'num_leaves': 107, 'min_data_in_leaf': 102, 'min_sum_hession_in_leaf': 0.002075728555798761, 'feature_fraction': 0.9336514100354826, 'bagging_fraction': 0.6817102653553969, 'lambda_l1': 6.01372581918132, 'lambda_l2': 0.18101756169937222}. Best is trial 2 with value: 0.7126796811248509.


Early stopping, best iteration is:
[163]	training's auc: 0.774717	valid_1's auc: 0.72332
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[23]	training's auc: 0.747053	valid_1's auc: 0.745455
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to re

[I 2024-05-30 19:20:45,307] Trial 28 finished with value: 0.6161634548992531 and parameters: {'num_leaves': 72, 'min_data_in_leaf': 184, 'min_sum_hession_in_leaf': 0.0006087327507705954, 'feature_fraction': 0.84731952840838, 'bagging_fraction': 0.6254957342799081, 'lambda_l1': 50.59746961823806, 'lambda_l2': 0.7308491614347951}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 273, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 185
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383427 -> initscore=-0.475028
[LightGBM] [Info] Start training from score -0.475028
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's auc: 0.5	valid_1's auc: 0.5
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [bin

[I 2024-05-30 19:20:45,575] Trial 29 finished with value: 0.698141987320319 and parameters: {'num_leaves': 38, 'min_data_in_leaf': 61, 'min_sum_hession_in_leaf': 0.000314095451385855, 'feature_fraction': 0.9250849383092855, 'bagging_fraction': 0.7831631986242769, 'lambda_l1': 12.244675589947384, 'lambda_l2': 0.1260073800737225}. Best is trial 2 with value: 0.7126796811248509.


[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.384292 -> initscore=-0.471371
[LightGBM] [Info] Start training from score -0.471371
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[42]	training's auc: 0.751588	valid_1's auc: 0.740241
[LightGBM] [Info] Number of positive: 274, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 189
[LightGBM] [Info] Number of data points in the train set: 713, number of used features: 3
[LightGBM] 

In [47]:
trial = study.best_trial
print('acc(best)={:.4f}'.format(trial.value))
display(trial.params)

acc(best)=0.7127


{'num_leaves': 107,
 'min_data_in_leaf': 149,
 'min_sum_hession_in_leaf': 3.52756635172055e-05,
 'feature_fraction': 0.5877258780737462,
 'bagging_fraction': 0.7657756869209191,
 'lambda_l1': 1.3406343673102123,
 'lambda_l2': 3.4482904089131434}

In [50]:
params_best = trial.params
params_best.update(params_base)
display(params_best)

{'num_leaves': 16,
 'min_data_in_leaf': 149,
 'min_sum_hession_in_leaf': 3.52756635172055e-05,
 'feature_fraction': 0.5877258780737462,
 'bagging_fraction': 0.7657756869209191,
 'lambda_l1': 1.3406343673102123,
 'lambda_l2': 3.4482904089131434,
 'boosting_type': 'gbdt',
 'objective': 'binary',
 'metric': 'auc',
 'learning_rate': 0.2,
 'n_estimators': 100000,
 'random_state': 123,
 'importance_type': 'gain',
 'bagging_freq': 1,
 'seed': 123}